Collection of scripts
From Bioinformatikpedia
A completely unordered, yet to become truly usefull collection of scripts we used.
PSIBlasttrainer.pl
#! /usr/bin/perl use strict; use Getopt::Long; my $input; my $output; my $database; my $maxIt; my $before; my $after; my $result = GetOptions ("i=s" => \$input,"o=s" => \$output,"d=s" => \$database,"j=i" => \$maxIt); open(TIME ,">","time.txt"); print TIME "\# time used for psiblast\n"; #run psiblast each time and save the time; for(my $i =1;$i<=$maxIt;$i++){ print "iteration: $i\n"; $before = time(); open STATUS, "-|","time blastpgp -j $i -d $database -i $input -o $output"."_".$i."_it" or die "can't fork: $!"; while(<STATUS>){ print TIME $_; } $after = time(); print TIME ($after-$before)."\n"; close STATUS; } print TIME ">"; for(my $i=1;$i<$maxIt;$i++){ my $eval = 1/(10**$i); print "iteration: $i\n"; $before = time(); system("time blastpgp -j 1 -d $database -i $input -o $output" . "_eVal_" . "$eval -e $eval"); $after = time(); print TIME ($after-$before)."\n"; close STATUS; } print TIME ">\n" for(my $i=1;$i<$maxIt;$i++){ my $eval = 1/(10**$i); print "iteration: $i\n"; $before = time(); system("time blastpgp -j 1 -d $database -i $input -o $output" . "_integr_" . "$eval -h $eval"); $after = time(); print TIME ($after-$before)."\n"; close STATUS; } close TIME;
Sequence Alignments
#!/usr/bin/env python #simple sript to count gaps and conserved columns in multiple alignment files import sys from collections import defaultdict def default_string(): return "" def aln_stats(filename): fh = open(filename,'r') sequences = defaultdict(default_string) for line in fh: if line[:2].strip() and not line.startswith("CLUSTAL"): seq = line.split() sequences[seq[0]]+="".join(seq[1:]) fh.close() for key in sequences: print key, sequences[key].count('-') print "conserved:",conserved(sequences) def fasta_stats(filename): fh = open(filename,'r') gap_count = 0 seq_id = "" sequences = defaultdict(default_string) for line in fh: if line.startswith(">"): if gap_count: print seq_id, gap_count gap_count = 0 seq_id = line.split()[0][1:] else: gap_count+=line.count('-') sequences[seq_id]+=line.strip() print seq_id, gap_count,"\nconserved:",conserved(sequences) fh.close() def conserved(sequence_dict): conserved_count = 0 for i in range(len(sequence_dict.values()[0])): col = set([]) for seq in sequence_dict.values(): col.add(seq[i]) if len(col)== 1: conserved_count+=1 return conserved_count if "--help" in sys.argv or len(sys.argv)<2: print "usage: python %s <filename>"%sys.argv[0] elif sys.argv[1].endswith(".aln"): aln_stats(sys.argv[1]) elif sys.argv[1].endswith(".fasta"): fasta_stats(sys.argv[1]) else: print "Only .aln or .fasta files, sorry!"