Difference between revisions of "Fabry:Sequence alignments (sequence searches and multiple alignments)/Scripts"
From Bioinformatikpedia
Rackersederj (talk | contribs) (Created page with "Please see Task 2 for our line of action on this topic. The results are located at [[Fabry:Sequenc…") |
Rackersederj (talk | contribs) (→Sequence searches) |
||
Line 5: | Line 5: | ||
== Sequence searches == |
== Sequence searches == |
||
+ | |||
=== Blast === |
=== Blast === |
||
> extract_ids_blast.pl |
> extract_ids_blast.pl |
||
Line 44: | Line 45: | ||
} |
} |
||
} |
} |
||
+ | |||
− | |||
+ | |||
> parse_blast.pl |
> parse_blast.pl |
||
#!/usr/bin/perl |
#!/usr/bin/perl |
||
Line 105: | Line 107: | ||
close(IN); |
close(IN); |
||
+ | >hist_blast.r |
||
− | |||
+ | |||
+ | table <- read.table(file="blastsearch_default_v700_ids_GOterms_comparison.txt") |
||
+ | |||
+ | png(file="blastsearch_default_v700_ids_GOterms_comparison.png", width = 480, height = 700) |
||
+ | layout(matrix(data=c(1,2), nrow=2, ncol=1), widths=c(1,1), heights=c(3,3)) |
||
+ | |||
+ | hx_1 <- hist(table[,1], main = "Common GO classifications vs. # GO terms P06280", xlab ="Percentage in common", xaxt = "n", ylim=c(0,500), xlim = c(0,1), col = "dodgerblue", breaks = 10) |
||
+ | text(hx_1$mids, hx_1$counts+2, label=c(hx_1$counts), pos = 3, col = "royalblue4") |
||
+ | text(0.1,500,expression("mean:")) |
||
+ | text(0.23,500,round(mean(table[,1]),4)) |
||
+ | axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) |
||
+ | |||
+ | hx_2 <- hist(table[,2], main = "Common GO classifications vs. # GO terms blast search", xlab ="Percentage in common", xaxt = "n", ylim=c(0,500), xlim = c(0,1), col = "dodgerblue", breaks = 10) |
||
+ | text(hx_2$mids, hx_2$counts+2, label=c(hx_2$counts), pos = 3, col = "royalblue4") |
||
+ | text(0.1,500,expression("mean:")) |
||
+ | text(0.23,500,round(mean(table[,2]),4)) |
||
+ | axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | |||
+ | ip <- read.table(file="blastsearch_default_v700.out_ident_cov.tsv", header = T) |
||
+ | |||
+ | png(file="blastsearch_default_v700_Evalues.png", width = 480, height = 400) |
||
+ | hist(ip$Evalue, xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of E-values, blastsearch", xlim=c(-200,0) ) |
||
+ | text(-130,80,round(mean(ip$Evalue[7:664]),4)) |
||
+ | text(-160,80,expression("mean:")) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | png(file="blastsearch_default_v700_Identities.png", width = 480, height = 400) |
||
+ | hist(ip$Identities, xlab="Identity in %", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of Identities, blastsearch", xlim=c(0,100) ) |
||
+ | text(75,80,expression("mean:")) |
||
+ | text(90,80,round(mean(ip$Identities),4)) |
||
+ | dev.off() |
||
+ | |||
+ | png(file="blastsearch_default_v700_Positives.png", width = 480, height = 400) |
||
+ | hist(ip$Positives, xlab="Positives in %", breaks=30, col = "dodgerblue", main = "Histogramm of Positives, blastsearch", xlim=c(0,100), ylim = c(0,80) ) |
||
+ | text(75,80,expression("mean:")) |
||
+ | text(90,80,round(mean(ip$Positives),4)) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | |||
+ | png(file="blastsearch_default_v700_Lenghts.png", width = 480, height = 400) |
||
+ | hist(ip$Length, xlab="Sequence length", breaks=30, col = "dodgerblue", ylim = c(0,250), xlim = c(0,2000), main = "Histogramm of sequence length, blastsearch" ) |
||
+ | |||
+ | text(1000,180,expression("mean length:")) |
||
+ | text(1500,180,round(mean(ip$Length),4)) |
||
+ | text(1000,160,expression("P06280 length:")) |
||
+ | text(1500,160,expression("429")) |
||
+ | |||
+ | dev.off() |
||
+ | |||
+ | |||
=== Psi-Blast === |
=== Psi-Blast === |
||
Revision as of 08:34, 5 May 2012
Please see Task 2 for our line of action on this topic. The results are located at Task 2 results
Contents
Sequence searches
Blast
> extract_ids_blast.pl #!/bin/perl my $Z = 0.003; my $filename = "blastsearch_default_v700.out"; my $outname = "blastsearch_default_v700_ids.txt"; my $outEval = "blastsearch_default_v700_Evalues.txt"; open IN, $filename or die $!; #open(OUT, ">" . $outname); open(OUT2, ">" . $outEval); while(<IN>) { if($_ =~ /^(tr|sp)/){ #print $_; chomp; $_ =~ /.+\|(.+)\|.+(.{5})/; $store_1 = $1; $store_2 = $2; $Evalue = $2; $Evalue =~ s/^\s+//; $Evalue =~ s/\s+$//; if( $Evalue > 0.003 ) {} else { # print OUT $store_1 . "\n"; if($Evalue eq "0.0"){ print OUT2 "-Inf\n"; } elsif($Evalue =~ /(.*)e(.+)/){ $Evalue =~ /(.*)e(.+)/; print OUT2 $2 ."\n"; } else { print OUT2 log($Evalue)/log(10) . "\n"; } } } }
> parse_blast.pl #!/usr/bin/perl use strict; use warnings; my $bool = 0; my $ID; my $Length; my $Evalue; my $Identities; my $Positives; open(IN, "< " .$ARGV[0]) or die $!; open(OUT, "> " .$ARGV[0] . "_ident_cov.tsv"); print OUT "ID\tLength\tEvalue\tIdentities\tPositives\n"; while (<IN>) { chomp; if($_ =~ /^>/) { $_ =~ />.+\|(.+)\|.+/; $ID = $1; $bool = 1; } elsif($_ =~ /^\s+Length/ && $bool == 1){ $_ =~ /\s+Length\ =\ (\d+)/; $Length = $1; } elsif ($_ =~ /^\s*Score\ =/ && $bool == 1){ $_ =~ /\s*Score\s+=\s+.+,\s+Expect\s+=\s+(.+?),\s+Method: .+/; $Evalue = $1; $Evalue =~ s/^\s+//; $Evalue =~ s/\s+$//; if($Evalue eq "0.0"){ $Evalue = "-Inf"; } elsif($Evalue =~ /(.*)e(.+)/){ $Evalue =~ /(.*)e(.+)/; $Evalue = $2; } else { $Evalue = log($Evalue)/log(10); } } elsif ($_ =~ /^\s*Identities/ && $bool == 1){ if($_ =~ /\s+Identities.+\((.+)\%\).+Positives.+\((.+)\%\).+Gaps.+\((.+)\%\)/){ $Identities = $1; $Positives = $2; } elsif($_ =~ /\s*Identities.+\((\d+)\%\).+Positives.+\((\d+)\%\).*/){ $Identities = $1; $Positives = $2; } if( $Evalue < -2.39794000867204 ) { print OUT "$ID\t$Length\t$Evalue\t$Identities\t$Positives\n"; $bool = 0; } } } close(IN); >hist_blast.r table <- read.table(file="blastsearch_default_v700_ids_GOterms_comparison.txt") png(file="blastsearch_default_v700_ids_GOterms_comparison.png", width = 480, height = 700) layout(matrix(data=c(1,2), nrow=2, ncol=1), widths=c(1,1), heights=c(3,3)) hx_1 <- hist(table[,1], main = "Common GO classifications vs. # GO terms P06280", xlab ="Percentage in common", xaxt = "n", ylim=c(0,500), xlim = c(0,1), col = "dodgerblue", breaks = 10) text(hx_1$mids, hx_1$counts+2, label=c(hx_1$counts), pos = 3, col = "royalblue4") text(0.1,500,expression("mean:")) text(0.23,500,round(mean(table[,1]),4)) axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) hx_2 <- hist(table[,2], main = "Common GO classifications vs. # GO terms blast search", xlab ="Percentage in common", xaxt = "n", ylim=c(0,500), xlim = c(0,1), col = "dodgerblue", breaks = 10) text(hx_2$mids, hx_2$counts+2, label=c(hx_2$counts), pos = 3, col = "royalblue4") text(0.1,500,expression("mean:")) text(0.23,500,round(mean(table[,2]),4)) axis(side=1, at=seq(0,1, 0.1) ,labels = seq(0,100, 10)) dev.off() ip <- read.table(file="blastsearch_default_v700.out_ident_cov.tsv", header = T) png(file="blastsearch_default_v700_Evalues.png", width = 480, height = 400) hist(ip$Evalue, xlab="log_10(E-value)", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of E-values, blastsearch", xlim=c(-200,0) ) text(-130,80,round(mean(ip$Evalue[7:664]),4)) text(-160,80,expression("mean:")) dev.off() png(file="blastsearch_default_v700_Identities.png", width = 480, height = 400) hist(ip$Identities, xlab="Identity in %", breaks=30, col = "dodgerblue", ylim = c(0,80), main = "Histogramm of Identities, blastsearch", xlim=c(0,100) ) text(75,80,expression("mean:")) text(90,80,round(mean(ip$Identities),4)) dev.off() png(file="blastsearch_default_v700_Positives.png", width = 480, height = 400) hist(ip$Positives, xlab="Positives in %", breaks=30, col = "dodgerblue", main = "Histogramm of Positives, blastsearch", xlim=c(0,100), ylim = c(0,80) ) text(75,80,expression("mean:")) text(90,80,round(mean(ip$Positives),4)) dev.off() png(file="blastsearch_default_v700_Lenghts.png", width = 480, height = 400) hist(ip$Length, xlab="Sequence length", breaks=30, col = "dodgerblue", ylim = c(0,250), xlim = c(0,2000), main = "Histogramm of sequence length, blastsearch" ) text(1000,180,expression("mean length:")) text(1500,180,round(mean(ip$Length),4)) text(1000,160,expression("P06280 length:")) text(1500,160,expression("429")) dev.off()