Fabry:Sequence alignments (sequence searches and multiple alignments)/Scripts
From Bioinformatikpedia
Revision as of 08:33, 5 May 2012 by Rackersederj (talk | contribs) (Created page with "Please see Task 2 for our line of action on this topic. The results are located at [[Fabry:Sequenc…")
Please see Task 2 for our line of action on this topic. The results are located at Task 2 results
Contents
Sequence searches
Blast
> extract_ids_blast.pl #!/bin/perl my $Z = 0.003; my $filename = "blastsearch_default_v700.out"; my $outname = "blastsearch_default_v700_ids.txt"; my $outEval = "blastsearch_default_v700_Evalues.txt"; open IN, $filename or die $!; #open(OUT, ">" . $outname); open(OUT2, ">" . $outEval); while(<IN>) { if($_ =~ /^(tr|sp)/){ #print $_; chomp; $_ =~ /.+\|(.+)\|.+(.{5})/; $store_1 = $1; $store_2 = $2; $Evalue = $2; $Evalue =~ s/^\s+//; $Evalue =~ s/\s+$//; if( $Evalue > 0.003 ) {} else { # print OUT $store_1 . "\n"; if($Evalue eq "0.0"){ print OUT2 "-Inf\n"; } elsif($Evalue =~ /(.*)e(.+)/){ $Evalue =~ /(.*)e(.+)/; print OUT2 $2 ."\n"; } else { print OUT2 log($Evalue)/log(10) . "\n"; } } } } > parse_blast.pl #!/usr/bin/perl use strict; use warnings; my $bool = 0; my $ID; my $Length; my $Evalue; my $Identities; my $Positives; open(IN, "< " .$ARGV[0]) or die $!; open(OUT, "> " .$ARGV[0] . "_ident_cov.tsv"); print OUT "ID\tLength\tEvalue\tIdentities\tPositives\n"; while (<IN>) { chomp; if($_ =~ /^>/) { $_ =~ />.+\|(.+)\|.+/; $ID = $1; $bool = 1; } elsif($_ =~ /^\s+Length/ && $bool == 1){ $_ =~ /\s+Length\ =\ (\d+)/; $Length = $1; } elsif ($_ =~ /^\s*Score\ =/ && $bool == 1){ $_ =~ /\s*Score\s+=\s+.+,\s+Expect\s+=\s+(.+?),\s+Method: .+/; $Evalue = $1; $Evalue =~ s/^\s+//; $Evalue =~ s/\s+$//; if($Evalue eq "0.0"){ $Evalue = "-Inf"; } elsif($Evalue =~ /(.*)e(.+)/){ $Evalue =~ /(.*)e(.+)/; $Evalue = $2; } else { $Evalue = log($Evalue)/log(10); } } elsif ($_ =~ /^\s*Identities/ && $bool == 1){ if($_ =~ /\s+Identities.+\((.+)\%\).+Positives.+\((.+)\%\).+Gaps.+\((.+)\%\)/){ $Identities = $1; $Positives = $2; } elsif($_ =~ /\s*Identities.+\((\d+)\%\).+Positives.+\((\d+)\%\).*/){ $Identities = $1; $Positives = $2; } if( $Evalue < -2.39794000867204 ) { print OUT "$ID\t$Length\t$Evalue\t$Identities\t$Positives\n"; $bool = 0; } } } close(IN);