Difference between revisions of "Fabry:Sequence alignments (sequence searches and multiple alignments)/Scripts"
From Bioinformatikpedia
Rackersederj (talk | contribs) (→Sequence searches) |
Rackersederj (talk | contribs) (→HHblits) |
||
Line 261: | Line 261: | ||
=== HHblits === |
=== HHblits === |
||
+ | >extract_evalues_hhblits.pl |
||
+ | #!/bin/perl |
||
+ | |||
+ | my $Z = 0.003; |
||
+ | my $filename = $ARGV[0]; |
||
+ | my $outEval = $ARGV[0] . "_Evalues.txt"; |
||
+ | open IN, $filename or die $!; |
||
+ | open(OUT2, ">" . $outEval); |
||
+ | |||
+ | while(<IN>) { |
||
+ | if($_ =~ /^\s*\d+\sUP20/){ |
||
+ | # print $_; |
||
+ | chomp; |
||
+ | |||
+ | $_ =~ /.{41}(.{7}).+/; |
||
+ | $Evalue = $1; |
||
+ | |||
+ | #print $Evalue ."\t" . $_ ."\n"; |
||
+ | |||
+ | $Evalue =~ s/^\s+//; |
||
+ | $Evalue =~ s/\s+$//; |
||
+ | |||
+ | if( $Evalue > 0.003 ) {} |
||
+ | else { |
||
+ | if($Evalue eq "0.0"){ |
||
+ | print OUT2 "-Inf\n"; |
||
+ | } |
||
+ | elsif($Evalue =~ /(.*)E(.+)/){ |
||
+ | $Evalue =~ /(.*)E(.+)/; |
||
+ | print OUT2 $2 ."\n"; |
||
+ | } |
||
+ | else { |
||
+ | print OUT2 log($Evalue)/log(10) . "\n"; |
||
+ | } |
||
+ | } |
||
+ | } |
||
+ | } |
||
+ | |||
+ | >parse_hhblits.pl |
||
+ | #!/usr/bin/perl |
||
+ | |||
+ | use strict; |
||
+ | use warnings; |
||
+ | |||
+ | my $bool = 0; |
||
+ | my $ID; |
||
+ | my $Evalue; |
||
+ | my $Identities; |
||
+ | my $Similarity; |
||
+ | |||
+ | open(IN, "< " .$ARGV[0]) or die $!; |
||
+ | open(OUT, "> " .$ARGV[0] . "_ident_cov.tsv"); |
||
+ | print OUT "ID\tEvalue\tIdentities\tSimilarity\n"; |
||
+ | while (<IN>) { |
||
+ | chomp; |
||
+ | if($_ =~ /^>/) { |
||
+ | my @array = split('\|', $_); |
||
+ | $array[4] =~ s/\.$//; |
||
+ | my @array2 = split('\ ', $array[4]); |
||
+ | $ID = $array2[0]; |
||
+ | $bool = 1; |
||
+ | } |
||
+ | elsif ($_ =~ /^\s*Probab/ && $bool == 1){ |
||
+ | $_ =~ /Probab=.+E-value=(.+)\s+Score=.+Aligned_cols=.+Identities=(\d+).+Similarity=(.+)\s+Sum_probs=.+/; |
||
+ | $Evalue = $1; |
||
+ | |||
+ | $Identities = $2; |
||
+ | $Similarity = $3; |
||
+ | |||
+ | $Evalue =~ s/^\s+//; |
||
+ | $Evalue =~ s/\s+$//; |
||
+ | if($Evalue eq "0.0"){ |
||
+ | $Evalue = "-Inf"; |
||
+ | } |
||
+ | elsif($Evalue =~ /(.*)e(.+)/){ |
||
+ | $Evalue =~ /(.*)e(.+)/; |
||
+ | $Evalue = $2; |
||
+ | } |
||
+ | else { |
||
+ | $Evalue = log($Evalue)/log(10); |
||
+ | } |
||
+ | if( $Evalue < -2.39794000867204 ) { |
||
+ | print OUT "$ID\t$Evalue\t$Identities\t$Similarity\n"; |
||
+ | $bool = 0; |
||
+ | } |
||
+ | } |
||
+ | } |
||
+ | close(IN); |
||
+ | |||
+ | |||
+ | > hist_hhblits.R |
||
+ | table <- read.table(file="hhblits_default_ids_GOterms_comparison.txt") |
||
+ | |||
+ | png(file="hhblits_default_ids_GOterms_comparison.png", width = 480, height = 700) |
||
+ | layout(matrix(data=c(1,2), nrow=2, ncol=1), widths=c(1,1), heights=c(3,3)) |
||
+ | |||
+ | hx_1 <- hist(table[,1], main = "Common GO classifications vs. # GO terms P06280\n(2 iterations - default)", xlab ="Percentage in common", xaxt = "n", ylim=c(0,200), xlim = c(0,1), col = "dodgerblue", breaks = 10) |
||
+ | text(hx_1$mids, hx_1$counts+2, label=c(hx_1$counts), pos = 3, col = "royalblue4") |
||
+ | text(0.1,200,expression("mean:")) |
||
+ | text(0.23,200,round(mean(table[,1]),4)) |
||
+ | axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) |
||
+ | |||
+ | hx_2 <- hist(table[,2], main = "Common GO classifications vs. # GO terms HHblits search\n(2 iterations - default)", xlab ="Percentage in common", xaxt = "n", ylim=c(0,200), xlim = c(0,1), col = "dodgerblue", breaks = 10) |
||
+ | text(hx_2$mids, hx_2$counts+2, label=c(hx_2$counts), pos = 3, col = "royalblue4") |
||
+ | text(0.1,200,expression("mean:")) |
||
+ | text(0.23,200,round(mean(table[,2]),4)) |
||
+ | axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | |||
+ | |||
+ | a <- read.table(file="hhblits_default.out_Evalues.txt") |
||
+ | |||
+ | png(file="hhblits_default.out_Evalues.png", width = 480, height = 400) |
||
+ | hist(a[,1], xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of E-values, HHblits search\n(2 iterations - default)", xlim=c(-200,0) ) |
||
+ | text(-100,80,round(mean(a[,1])),4) |
||
+ | text(-160,80,expression("mean:")) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | |||
+ | ip_def <- read.table(file="hhblits_default.out_ident_cov.tsv", header = T) |
||
+ | |||
+ | png(file="hhblits_default_Evalues.png", width = 480, height = 400) |
||
+ | hist(ip_def$Evalue, xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of E-values, HHblits search\n(2 iterations - default)", xlim=c(-200,0) ) |
||
+ | text(-130,80,round(mean(ip_def$Evalue),4)) |
||
+ | text(-160,80,expression("mean:")) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | png(file="hhblits_default_Identities.png", width = 480, height = 400) |
||
+ | hist(ip_def$Identities, xlab="Identity in %", breaks=30, col = "dodgerblue", ylim = c(0,40), main = "Histogramm of Identities, HHblits search\n(2 iterations - default)", xlim=c(0,100) ) |
||
+ | text(75,40,expression("mean:")) |
||
+ | text(90,40,round(mean(ip_def$Identities),4)) |
||
+ | dev.off() |
||
+ | |||
+ | png(file="hhblits_default_Similarity.png", width = 480, height = 400) |
||
+ | hist(ip_def$Similarity, xlab="Similarity", breaks=30, col = "dodgerblue", main = "Histogramm of Similarity, HHblits search\n(2 iterations - default)", ylim = c(0,40), xlim=c(0,2) ) |
||
+ | text(1.5,40,expression("mean:")) |
||
+ | text(1.8,40,round(mean(ip_def$Similarity),4)) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | # ####################################n8############################################## |
||
+ | table <- read.table(file="hhblits_n8_neu_ids_GOterms_comparison.txt") |
||
+ | |||
+ | png(file="hhblits_n8_neu_ids_GOterms_comparison.png", width = 480, height = 700) |
||
+ | layout(matrix(data=c(1,2), nrow=2, ncol=1), widths=c(1,1), heights=c(3,3)) |
||
+ | |||
+ | hx_1 <- hist(table[,1], main = "Common GO classifications vs. # GO terms P06280\n(8 iterations)", xlab ="Percentage in common", xaxt = "n", ylim=c(0,400), xlim = c(0,1), col = "dodgerblue", breaks = 10) |
||
+ | text(hx_1$mids, hx_1$counts+2, label=c(hx_1$counts), pos = 3, col = "royalblue4") |
||
+ | text(0.8,380,expression("mean:")) |
||
+ | text(0.93,380,round(mean(table[,1]),4)) |
||
+ | axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) |
||
+ | |||
+ | hx_2 <- hist(table[,2], main = "Common GO classifications vs. # GO terms HHblits search\n(8 iterations)", xlab ="Percentage in common", xaxt = "n", ylim=c(0,400), xlim = c(0,1), col = "dodgerblue", breaks = 10) |
||
+ | text(hx_2$mids, hx_2$counts+2, label=c(hx_2$counts), pos = 3, col = "royalblue4") |
||
+ | text(0.8,380,expression("mean:")) |
||
+ | text(0.93,380,round(mean(table[,2]),4)) |
||
+ | axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | |||
+ | |||
+ | |||
+ | ip_n8_neu <- read.table(file="hhblits_n8_neu.out_ident_cov.tsv", header = T) |
||
+ | |||
+ | png(file="hhblits_n8_neu_Evalues.png", width = 480, height = 400) |
||
+ | hist(ip_n8_neu$Evalue, xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,250), main = "Histogramm of E-values, HHblits search\n(8 iterations)", xlim=c(-200,0) ) |
||
+ | text(-130,250,round(mean(ip_n8_neu$Evalue),4)) |
||
+ | text(-160,250,expression("mean:")) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | png(file="hhblits_n8_neu_Identities.png", width = 480, height = 400) |
||
+ | hist(ip_n8_neu$Identities, xlab="Identity in %", breaks=30, col = "dodgerblue", ylim = c(0,300), main = "Histogramm of Identities, HHblits search\n(8 iterations)", xlim=c(0,100) ) |
||
+ | text(75,300,expression("mean:")) |
||
+ | text(90,300,round(mean(ip_n8_neu$Identities),4)) |
||
+ | dev.off() |
||
+ | |||
+ | png(file="hhblits_n8_neu_Similarity.png", width = 480, height = 400) |
||
+ | hist(ip_n8_neu$Similarity, xlab="Similarity", breaks=30, col = "dodgerblue", main = "Histogramm of Similarity, HHblits search\n(8 iterations)", ylim = c(0,150), xlim=c(0,2) ) |
||
+ | text(1.5,150,expression("mean:")) |
||
+ | text(1.8,150,round(mean(ip_n8_neu$Similarity),4)) |
||
+ | |||
+ | dev.off() |
||
=== Comparison === |
=== Comparison === |
Revision as of 08:39, 5 May 2012
Please see Task 2 for our line of action on this topic. The results are located at Task 2 results
Contents
Sequence searches
>compare_GO_terms.pl #!/usr/bin/perl use LWP::UserAgent; #reference..."OUR" protein our %reference_hash = (); $ua = LWP::UserAgent->new; $req = HTTP::Request->new(GET => "http://www.ebi.ac.uk/QuickGO/GAnnotation?protein=" . $ARGV[0] . "&format=tsv"); $res = $ua->request($req,'annotation.tsv'); open (FILE, '<annotation.tsv'); $head=<FILE>; while (<FILE>) { chomp; ($db,$id,$splice,$symbol,$taxon,$qualifier,$goID,$goName,$reference,$evidence,$with,$aspect,$date,$source) = split(/\t/); #make GO terms unique by writing them into hash $reference_hash{$goID} = ""; } #print "@{[ %reference_hash ]}\t" . "\n\n"; my $size_ref_hash = scalar(keys %reference_hash); #comparing to... $filename = $ARGV[1]; $filename =~ /(.*)\..*/; $outname = $1 . '_comparison.txt'; open(IDS,"<".$filename); open(OUT,">".$outname); while(<IDS>) { our %compare_hash = (); @array = split(/\s/); pop(@array); for ($i=1;$i<@array;$i++){ $compare_hash{$array[$i]} = ""; } #print "@{[ %compare_hash ]}\t". "\n"; my @common = grep exists $reference_hash{ $_ }, keys %compare_hash; my $size_common = scalar(@common); my $size_comp_hash = scalar(keys %compare_hash); print OUT $size_common / $size_ref_hash ."\t"; if ( $size_comp_hash > 0) { print OUT $size_common / $size_comp_hash ."\n"; } else { print OUT "0\n"; } }
>download-annotation.pl #!/usr/bin/perl use LWP::UserAgent; $filename = $ARGV[0]; $filename =~ /(.*)\..*/; $outname = $1 . '_GOterms.tsv'; open IDS, $filename or die $!; open (OUT, '>' . $outname); $result = ""; while(<IDS>){ chomp; $_ =~ s/\.//gi; $ua = LWP::UserAgent->new; $req = HTTP::Request->new(GET => "http://www.ebi.ac.uk/QuickGO/GAnnotation?protein=" . $_ . "&format=tsv"); $res = $ua->request($req,'annotation.tsv'); %hash = (); open (FILE, '<annotation.tsv'); print OUT "$_\t"; $head=<FILE>; while (<FILE>) { chomp; ($db,$id,$splice,$symbol,$taxon,$qualifier,$goID,$goName,$reference,$evidence,$with,$aspect,$date,$source) = split(/\t/); #make GO terms unique by writing them into hash $hash{$goID} = ""; } print OUT "@{[ %hash ]}\t"; print OUT "\n"; close (FILE); } close (OUT); close IDS;
Blast
> extract_ids_blast.pl #!/bin/perl my $Z = 0.003; my $filename = "blastsearch_default_v700.out"; my $outname = "blastsearch_default_v700_ids.txt"; my $outEval = "blastsearch_default_v700_Evalues.txt"; open IN, $filename or die $!; #open(OUT, ">" . $outname); open(OUT2, ">" . $outEval); while(<IN>) { if($_ =~ /^(tr|sp)/){ #print $_; chomp; $_ =~ /.+\|(.+)\|.+(.{5})/; $store_1 = $1; $store_2 = $2; $Evalue = $2; $Evalue =~ s/^\s+//; $Evalue =~ s/\s+$//; if( $Evalue > 0.003 ) {} else { # print OUT $store_1 . "\n"; if($Evalue eq "0.0"){ print OUT2 "-Inf\n"; } elsif($Evalue =~ /(.*)e(.+)/){ $Evalue =~ /(.*)e(.+)/; print OUT2 $2 ."\n"; } else { print OUT2 log($Evalue)/log(10) . "\n"; } } } }
> parse_blast.pl #!/usr/bin/perl use strict; use warnings; my $bool = 0; my $ID; my $Length; my $Evalue; my $Identities; my $Positives; open(IN, "< " .$ARGV[0]) or die $!; open(OUT, "> " .$ARGV[0] . "_ident_cov.tsv"); print OUT "ID\tLength\tEvalue\tIdentities\tPositives\n"; while (<IN>) { chomp; if($_ =~ /^>/) { $_ =~ />.+\|(.+)\|.+/; $ID = $1; $bool = 1; } elsif($_ =~ /^\s+Length/ && $bool == 1){ $_ =~ /\s+Length\ =\ (\d+)/; $Length = $1; } elsif ($_ =~ /^\s*Score\ =/ && $bool == 1){ $_ =~ /\s*Score\s+=\s+.+,\s+Expect\s+=\s+(.+?),\s+Method: .+/; $Evalue = $1; $Evalue =~ s/^\s+//; $Evalue =~ s/\s+$//; if($Evalue eq "0.0"){ $Evalue = "-Inf"; } elsif($Evalue =~ /(.*)e(.+)/){ $Evalue =~ /(.*)e(.+)/; $Evalue = $2; } else { $Evalue = log($Evalue)/log(10); } } elsif ($_ =~ /^\s*Identities/ && $bool == 1){ if($_ =~ /\s+Identities.+\((.+)\%\).+Positives.+\((.+)\%\).+Gaps.+\((.+)\%\)/){ $Identities = $1; $Positives = $2; } elsif($_ =~ /\s*Identities.+\((\d+)\%\).+Positives.+\((\d+)\%\).*/){ $Identities = $1; $Positives = $2; } if( $Evalue < -2.39794000867204 ) { print OUT "$ID\t$Length\t$Evalue\t$Identities\t$Positives\n"; $bool = 0; } } } close(IN);
>hist_blast.r table <- read.table(file="blastsearch_default_v700_ids_GOterms_comparison.txt") png(file="blastsearch_default_v700_ids_GOterms_comparison.png", width = 480, height = 700) layout(matrix(data=c(1,2), nrow=2, ncol=1), widths=c(1,1), heights=c(3,3)) hx_1 <- hist(table[,1], main = "Common GO classifications vs. # GO terms P06280", xlab ="Percentage in common", xaxt = "n", ylim=c(0,500), xlim = c(0,1), col = "dodgerblue", breaks = 10) text(hx_1$mids, hx_1$counts+2, label=c(hx_1$counts), pos = 3, col = "royalblue4") text(0.1,500,expression("mean:")) text(0.23,500,round(mean(table[,1]),4)) axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) hx_2 <- hist(table[,2], main = "Common GO classifications vs. # GO terms blast search", xlab ="Percentage in common", xaxt = "n", ylim=c(0,500), xlim = c(0,1), col = "dodgerblue", breaks = 10) text(hx_2$mids, hx_2$counts+2, label=c(hx_2$counts), pos = 3, col = "royalblue4") text(0.1,500,expression("mean:")) text(0.23,500,round(mean(table[,2]),4)) axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) dev.off() ip <- read.table(file="blastsearch_default_v700.out_ident_cov.tsv", header = T) png(file="blastsearch_default_v700_Evalues.png", width = 480, height = 400) hist(ip$Evalue, xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of E-values, blastsearch", xlim=c(-200,0) ) text(-130,80,round(mean(ip$Evalue[7:664]),4)) text(-160,80,expression("mean:")) dev.off() png(file="blastsearch_default_v700_Identities.png", width = 480, height = 400) hist(ip$Identities, xlab="Identity in %", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of Identities, blastsearch", xlim=c(0,100) ) text(75,80,expression("mean:")) text(90,80,round(mean(ip$Identities),4)) dev.off() png(file="blastsearch_default_v700_Positives.png", width = 480, height = 400) hist(ip$Positives, xlab="Positives in %", breaks=30, col = "dodgerblue", main = "Histogramm of Positives, blastsearch", xlim=c(0,100), ylim = c(0,80) ) text(75,80,expression("mean:")) text(90,80,round(mean(ip$Positives),4)) dev.off() png(file="blastsearch_default_v700_Lenghts.png", width = 480, height = 400) hist(ip$Length, xlab="Sequence length", breaks=30, col = "dodgerblue", ylim = c(0,250), xlim = c(0,2000), main = "Histogramm of sequence length, blastsearch" ) text(1000,180,expression("mean length:")) text(1500,180,round(mean(ip$Length),4)) text(1000,160,expression("P06280 length:")) text(1500,160,expression("429")) dev.off()
Psi-Blast
HHblits
>extract_evalues_hhblits.pl #!/bin/perl my $Z = 0.003; my $filename = $ARGV[0]; my $outEval = $ARGV[0] . "_Evalues.txt"; open IN, $filename or die $!; open(OUT2, ">" . $outEval); while(<IN>) { if($_ =~ /^\s*\d+\sUP20/){ # print $_; chomp; $_ =~ /.{41}(.{7}).+/; $Evalue = $1; #print $Evalue ."\t" . $_ ."\n"; $Evalue =~ s/^\s+//; $Evalue =~ s/\s+$//; if( $Evalue > 0.003 ) {} else { if($Evalue eq "0.0"){ print OUT2 "-Inf\n"; } elsif($Evalue =~ /(.*)E(.+)/){ $Evalue =~ /(.*)E(.+)/; print OUT2 $2 ."\n"; } else { print OUT2 log($Evalue)/log(10) . "\n"; } } } }
>parse_hhblits.pl #!/usr/bin/perl use strict; use warnings; my $bool = 0; my $ID; my $Evalue; my $Identities; my $Similarity; open(IN, "< " .$ARGV[0]) or die $!; open(OUT, "> " .$ARGV[0] . "_ident_cov.tsv"); print OUT "ID\tEvalue\tIdentities\tSimilarity\n"; while (<IN>) { chomp; if($_ =~ /^>/) { my @array = split('\|', $_); $array[4] =~ s/\.$//; my @array2 = split('\ ', $array[4]); $ID = $array2[0]; $bool = 1; } elsif ($_ =~ /^\s*Probab/ && $bool == 1){ $_ =~ /Probab=.+E-value=(.+)\s+Score=.+Aligned_cols=.+Identities=(\d+).+Similarity=(.+)\s+Sum_probs=.+/; $Evalue = $1; $Identities = $2; $Similarity = $3; $Evalue =~ s/^\s+//; $Evalue =~ s/\s+$//; if($Evalue eq "0.0"){ $Evalue = "-Inf"; } elsif($Evalue =~ /(.*)e(.+)/){ $Evalue =~ /(.*)e(.+)/; $Evalue = $2; } else { $Evalue = log($Evalue)/log(10); } if( $Evalue < -2.39794000867204 ) { print OUT "$ID\t$Evalue\t$Identities\t$Similarity\n"; $bool = 0; } } } close(IN);
> hist_hhblits.R table <- read.table(file="hhblits_default_ids_GOterms_comparison.txt") png(file="hhblits_default_ids_GOterms_comparison.png", width = 480, height = 700) layout(matrix(data=c(1,2), nrow=2, ncol=1), widths=c(1,1), heights=c(3,3)) hx_1 <- hist(table[,1], main = "Common GO classifications vs. # GO terms P06280\n(2 iterations - default)", xlab ="Percentage in common", xaxt = "n", ylim=c(0,200), xlim = c(0,1), col = "dodgerblue", breaks = 10) text(hx_1$mids, hx_1$counts+2, label=c(hx_1$counts), pos = 3, col = "royalblue4") text(0.1,200,expression("mean:")) text(0.23,200,round(mean(table[,1]),4)) axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) hx_2 <- hist(table[,2], main = "Common GO classifications vs. # GO terms HHblits search\n(2 iterations - default)", xlab ="Percentage in common", xaxt = "n", ylim=c(0,200), xlim = c(0,1), col = "dodgerblue", breaks = 10) text(hx_2$mids, hx_2$counts+2, label=c(hx_2$counts), pos = 3, col = "royalblue4") text(0.1,200,expression("mean:")) text(0.23,200,round(mean(table[,2]),4)) axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) dev.off() a <- read.table(file="hhblits_default.out_Evalues.txt") png(file="hhblits_default.out_Evalues.png", width = 480, height = 400) hist(a[,1], xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of E-values, HHblits search\n(2 iterations - default)", xlim=c(-200,0) ) text(-100,80,round(mean(a[,1])),4) text(-160,80,expression("mean:")) dev.off() ip_def <- read.table(file="hhblits_default.out_ident_cov.tsv", header = T) png(file="hhblits_default_Evalues.png", width = 480, height = 400) hist(ip_def$Evalue, xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of E-values, HHblits search\n(2 iterations - default)", xlim=c(-200,0) ) text(-130,80,round(mean(ip_def$Evalue),4)) text(-160,80,expression("mean:")) dev.off() png(file="hhblits_default_Identities.png", width = 480, height = 400) hist(ip_def$Identities, xlab="Identity in %", breaks=30, col = "dodgerblue", ylim = c(0,40), main = "Histogramm of Identities, HHblits search\n(2 iterations - default)", xlim=c(0,100) ) text(75,40,expression("mean:")) text(90,40,round(mean(ip_def$Identities),4)) dev.off() png(file="hhblits_default_Similarity.png", width = 480, height = 400) hist(ip_def$Similarity, xlab="Similarity", breaks=30, col = "dodgerblue", main = "Histogramm of Similarity, HHblits search\n(2 iterations - default)", ylim = c(0,40), xlim=c(0,2) ) text(1.5,40,expression("mean:")) text(1.8,40,round(mean(ip_def$Similarity),4)) dev.off() # ####################################n8############################################## table <- read.table(file="hhblits_n8_neu_ids_GOterms_comparison.txt") png(file="hhblits_n8_neu_ids_GOterms_comparison.png", width = 480, height = 700) layout(matrix(data=c(1,2), nrow=2, ncol=1), widths=c(1,1), heights=c(3,3)) hx_1 <- hist(table[,1], main = "Common GO classifications vs. # GO terms P06280\n(8 iterations)", xlab ="Percentage in common", xaxt = "n", ylim=c(0,400), xlim = c(0,1), col = "dodgerblue", breaks = 10) text(hx_1$mids, hx_1$counts+2, label=c(hx_1$counts), pos = 3, col = "royalblue4") text(0.8,380,expression("mean:")) text(0.93,380,round(mean(table[,1]),4)) axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) hx_2 <- hist(table[,2], main = "Common GO classifications vs. # GO terms HHblits search\n(8 iterations)", xlab ="Percentage in common", xaxt = "n", ylim=c(0,400), xlim = c(0,1), col = "dodgerblue", breaks = 10) text(hx_2$mids, hx_2$counts+2, label=c(hx_2$counts), pos = 3, col = "royalblue4") text(0.8,380,expression("mean:")) text(0.93,380,round(mean(table[,2]),4)) axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) dev.off() ip_n8_neu <- read.table(file="hhblits_n8_neu.out_ident_cov.tsv", header = T) png(file="hhblits_n8_neu_Evalues.png", width = 480, height = 400) hist(ip_n8_neu$Evalue, xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,250), main = "Histogramm of E-values, HHblits search\n(8 iterations)", xlim=c(-200,0) ) text(-130,250,round(mean(ip_n8_neu$Evalue),4)) text(-160,250,expression("mean:")) dev.off() png(file="hhblits_n8_neu_Identities.png", width = 480, height = 400) hist(ip_n8_neu$Identities, xlab="Identity in %", breaks=30, col = "dodgerblue", ylim = c(0,300), main = "Histogramm of Identities, HHblits search\n(8 iterations)", xlim=c(0,100) ) text(75,300,expression("mean:")) text(90,300,round(mean(ip_n8_neu$Identities),4)) dev.off() png(file="hhblits_n8_neu_Similarity.png", width = 480, height = 400) hist(ip_n8_neu$Similarity, xlab="Similarity", breaks=30, col = "dodgerblue", main = "Histogramm of Similarity, HHblits search\n(8 iterations)", ylim = c(0,150), xlim=c(0,2) ) text(1.5,150,expression("mean:")) text(1.8,150,round(mean(ip_n8_neu$Similarity),4)) dev.off()