Fabry:Sequence alignments (sequence searches and multiple alignments)/Scripts

From Bioinformatikpedia
Revision as of 08:33, 5 May 2012 by Rackersederj (talk | contribs) (Created page with "Please see Task 2 for our line of action on this topic. The results are located at [[Fabry:Sequenc…")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Please see Task 2 for our line of action on this topic. The results are located at Task 2 results


Sequence searches

Blast

> extract_ids_blast.pl
#!/bin/perl

my $Z = 0.003;
my $filename	= "blastsearch_default_v700.out";
my $outname	= "blastsearch_default_v700_ids.txt";
my $outEval	= "blastsearch_default_v700_Evalues.txt";
open IN, $filename or die $!;
#open(OUT, ">" . $outname);
open(OUT2, ">" . $outEval);

while(<IN>) {
	if($_ =~ /^(tr|sp)/){
		#print $_;
		chomp;
		$_ =~ /.+\|(.+)\|.+(.{5})/;
		$store_1 = $1;
		$store_2 = $2;
		$Evalue = $2;		
		$Evalue =~ s/^\s+//;
		$Evalue =~ s/\s+$//;

		if( $Evalue > 0.003 ) {}
		else {
#			print OUT  $store_1 . "\n";
			if($Evalue eq  "0.0"){
				print OUT2 "-Inf\n";
			}
			elsif($Evalue =~ /(.*)e(.+)/){
				$Evalue =~ /(.*)e(.+)/;
				print OUT2 $2 ."\n";
			}
			else {
				print OUT2 log($Evalue)/log(10) . "\n";
			}
		}
	}
}

> parse_blast.pl
#!/usr/bin/perl

use strict;
use warnings;

my $bool = 0;
my $ID;
my $Length;
my $Evalue;
my $Identities;
my $Positives;

open(IN, "< " .$ARGV[0]) or die $!;
open(OUT, "> " .$ARGV[0] . "_ident_cov.tsv");
print OUT "ID\tLength\tEvalue\tIdentities\tPositives\n";
while (<IN>) {
	chomp;
	if($_ =~ /^>/) {
		$_ =~ />.+\|(.+)\|.+/;
		$ID = $1;
		$bool = 1;
	}
	elsif($_ =~ /^\s+Length/ && $bool == 1){
		$_ =~ /\s+Length\ =\ (\d+)/;
		$Length = $1;
	}
	elsif ($_ =~ /^\s*Score\ =/ && $bool == 1){
		$_ =~ /\s*Score\s+=\s+.+,\s+Expect\s+=\s+(.+?),\s+Method: .+/;
		$Evalue = $1;		
		$Evalue =~ s/^\s+//;
		$Evalue =~ s/\s+$//;
		if($Evalue eq  "0.0"){
			$Evalue = "-Inf";
		}
		elsif($Evalue =~ /(.*)e(.+)/){
			$Evalue =~ /(.*)e(.+)/;
			$Evalue = $2;
		}
		else {
			$Evalue = log($Evalue)/log(10);
		}
	}
	elsif ($_ =~ /^\s*Identities/ && $bool == 1){
		if($_ =~ /\s+Identities.+\((.+)\%\).+Positives.+\((.+)\%\).+Gaps.+\((.+)\%\)/){
		$Identities = $1;
		$Positives = $2;
		}
		elsif($_ =~ /\s*Identities.+\((\d+)\%\).+Positives.+\((\d+)\%\).*/){
		$Identities = $1;
		$Positives = $2;	
		}
		if( $Evalue < -2.39794000867204 ) {
			print OUT "$ID\t$Length\t$Evalue\t$Identities\t$Positives\n";
			$bool = 0;
		}	
	}
}
close(IN);
 

Psi-Blast

HHblits

Comparison

Comparing the hits

Comparing the Evalues