Difference between revisions of "Run.pl"
From Bioinformatikpedia
Kalemanovm (talk | contribs) |
Kalemanovm (talk | contribs) |
||
Line 16: | Line 16: | ||
Code: |
Code: |
||
<source lang="perl"> |
<source lang="perl"> |
||
+ | #!/usr/bin/perl -w |
||
+ | ################################################################################################################################################# |
||
+ | # run.pl |
||
+ | # Performs a BLAST/PSI-BLAST/HHblits run with custom parameters on biolab computers. |
||
+ | # @autor: Maria Kalemanov |
||
+ | #example usages: ( from directory /mnt/home/student/kalemanovm/master_practical/Assignment2_Alignments/output/task1, but please do not try to execute it from there, use you own directories;) ) |
||
+ | |||
+ | #BLAST: |
||
+ | #../../scripts/task1/run.pl --in ../../gaucher_data/P04062.fasta --out_b blast-big_80 |
||
+ | #../scripts/task1/run.pl --in ../../gaucher_data/P04062.fasta --out_b blast-big -db_b big |
||
+ | |||
+ | #Psi-BLAST: |
||
+ | #../../scripts/task1/run.pl --in ../../gaucher_data/P04062.fasta --out_p psiblast-2iter-big_80 --c psiblast-1iter-big_80.chk |
||
+ | #../../scripts/task1/run.pl --in ../../gaucher_data/P04062.fasta --out_p psiblast-1iter-big_80--2iter_big --db_b big --r psiblast-1iter-big_80.chk --c psiblast-1iter-big_80--1iter_big.chk |
||
+ | #../../scripts/task1/run.pl --in ../../gaucher_data/P04062.fasta --out_p psiblast-1iter-big_80--1iter_big--1iter_pdb --db_b pdb --iter_p 1 --r psiblast-1iter-big_80--1iter_big.chk |
||
+ | |||
+ | #HHblits: |
||
+ | #../../scripts/task1/run.pl --in ../../gaucher_data/P04062.fasta --out_h hhblits-2iter-uniprot20 |
||
+ | #../../scripts/task1/run.pl --in hhblits-2iter-uniprot20.oa3m --out_h hhblits-2iter-uniprot20--1iter-pdb --db_h pdb --iter_h 1 |
||
+ | ################################################################################################################################################# |
||
+ | use strict; |
||
+ | use File::chdir; |
||
+ | use Getopt::Long; |
||
+ | |||
+ | |||
+ | # Parse command line parameters |
||
+ | our($in, $out_b, $out_p, $out_h, $db_b, $db_h, $iter_p, $iter_h, $eval_p, $eval_h, $c, $r, $h); |
||
+ | |||
+ | my $args_ok = GetOptions('in=s' => \$in, #full path of the input file with a protein sequence |
||
+ | 'out_b=s' => \$out_b, #path where the output file of BLAST should be saved |
||
+ | 'out_p=s' => \$out_p, #path where the output file of Psi-BLAST should be saved |
||
+ | 'out_h=s' => \$out_h, #path where the output file of HHblits should be saved |
||
+ | #optional parameters: |
||
+ | 'db_b=s' => \$db_b, #BLAST/Psi-BLAST DB, e.g. big_80 or big |
||
+ | 'db_h=s' => \$db_h, #HHblits DB, e.g. uniprot20 |
||
+ | 'iter_p=i' => \$iter_p, #Psi-BLAST number of iterations |
||
+ | 'iter_h=i' => \$iter_h, #HHblits number of iterations |
||
+ | 'eval_p=s' => \$eval_p, #E-Value cutoff for Psi-BLAST |
||
+ | 'eval_h=s' => \$eval_h, #E-Value cutoff for HHblits |
||
+ | 'c=s' => \$c, #Psi-BLAST: create a checkpoint PSSM file to use in further searches |
||
+ | 'r=s' => \$r, #Psi-BLAST: reuse a created checkpoint file |
||
+ | 'h' => \$h #print usage |
||
+ | ); |
||
+ | |||
+ | |||
+ | sub print_usage { |
||
+ | print "Usage: perl run.pl --in <input sequence file> --out_b <output blast file> --out_p <output psiblast file> --out_h <output hhblits file> [options]\n" . |
||
+ | "--db_b <database BLAST/Psi-BLAST>\tdefault=big_80\n" . |
||
+ | "--db_h <database HHblits>\tdefault=uniprot20\n" . |
||
+ | "--iter_p <number of iterations Psi-BLAST>\tdefault=2 (in this script)\n" . |
||
+ | "--iter_h <number of iterations HHblits,[1,8]>\tdefault=2\n" . |
||
+ | "--eval_p <E-Value cutoff for Psi-BLAST, [0,1]>\tdefault=0.002\n". |
||
+ | "--eval_h <E-Value cutoff for HHblits, [0,1]>\tdefault=0.001\n". |
||
+ | "--c <checkpoins Psi-BLAST file name>\tckeckpoint PSSM file should be created to search in other iterations (in other database)\n" . |
||
+ | "--r <checkpoins Psi-BLAST file name>\tckeckpoint PSSM file to be reused to search in other iterations (in other database)\n" . |
||
+ | "--h prints this usage\n"; |
||
+ | } |
||
+ | |||
+ | sub print_err { |
||
+ | print "Failed to execute run.pl\n"; |
||
+ | print_usage(); |
||
+ | exit; |
||
+ | } |
||
+ | |||
+ | |||
+ | if ($h){ |
||
+ | print_usage(); |
||
+ | exit; |
||
+ | } |
||
+ | if (!$in || !($out_b || $out_p || $out_h)){ |
||
+ | print_err(); |
||
+ | } |
||
+ | |||
+ | |||
+ | #Databases: |
||
+ | #for (Psi-)BLAST: |
||
+ | my $big_80 = "/mnt/project/rost_db/data/big/big_80"; |
||
+ | my $big = "/mnt/project/rost_db/data/big/big"; |
||
+ | my $pdb_seqres = "/mnt/home/rost/kloppmann/data/blast_db/pdb_seqres"; |
||
+ | #for HHblits: |
||
+ | my $uniprot20 = "/mnt/project/rost_db/data/hhblits/uniprot20_02Sep11"; |
||
+ | my $pdb_full = "/mnt/project/rost_db/data/hhblits/pdb_full"; |
||
+ | |||
+ | |||
+ | #Set $db_b from short databases names: |
||
+ | if ($db_b){ |
||
+ | if ($db_b eq "big_80"){ |
||
+ | $db_b = $big_80; |
||
+ | }elsif ($db_b eq "big"){ |
||
+ | $db_b = $big; |
||
+ | }elsif ($db_b eq "pdb" || $db_b eq "pdb_seqres"){ |
||
+ | $db_b = $pdb_seqres; |
||
+ | } |
||
+ | } |
||
+ | if ($db_h){ |
||
+ | if ($db_h eq "uniprot" || $db_h eq "uniprot20"){ |
||
+ | $db_h = $uniprot20; |
||
+ | }elsif ($db_h eq "pdb" || $db_h eq "pdb_full"){ |
||
+ | $db_h = $pdb_full; |
||
+ | } |
||
+ | } |
||
+ | |||
+ | |||
+ | #Set default parameters: |
||
+ | my $num_shown_hits = 10000; |
||
+ | if (!$db_b){ |
||
+ | $db_b = $big_80; |
||
+ | } |
||
+ | if (!$db_h){ |
||
+ | $db_h = $uniprot20; |
||
+ | } |
||
+ | if (!$iter_p){ |
||
+ | $iter_p = 2; |
||
+ | } |
||
+ | if (!$iter_h){ |
||
+ | $iter_h = 2; |
||
+ | } |
||
+ | if (!$eval_p){ |
||
+ | $eval_p = 0.002; |
||
+ | } |
||
+ | if (!$eval_h){ |
||
+ | $eval_h = 0.001; |
||
+ | } |
||
+ | |||
+ | #Other variables: |
||
+ | my $pssm_p = ""; |
||
+ | if ($out_p){ |
||
+ | $pssm_p = $out_p . "_pssm"; |
||
+ | } |
||
+ | my $pssm_h = ""; |
||
+ | my $hhm = ""; |
||
+ | if ($out_h){ |
||
+ | $pssm_h = $out_h . ".a3m"; |
||
+ | $hhm = $out_h . ".hhm"; |
||
+ | $out_h = $out_h . ".hhr"; |
||
+ | } |
||
+ | my $cmd_b; #command line parameters for BLAST |
||
+ | my $cmd_p; #command line parameters for Psi-BLAST |
||
+ | my $cmd_h; #command line parameters for HHblits |
||
+ | |||
+ | |||
+ | #Run BLAST: |
||
+ | if ($in && $out_b){ |
||
+ | $cmd_b = "time blastall -p blastp -i $in -d $db_b -v $num_shown_hits -b $num_shown_hits -o $out_b"; |
||
+ | system($cmd_b) && print "Failed to execute $cmd_b"; |
||
+ | } |
||
+ | #if ($in && $out_b){ |
||
+ | # $cmd_b = "time blastpgp -i $in -d $db_b -j 1 -v $num_shown_hits -b $num_shown_hits -o $out_b"; |
||
+ | # system($cmd_b) && print "Failed to execute $cmd_b"; |
||
+ | #} |
||
+ | |||
+ | |||
+ | # Run Psi-BLAST: |
||
+ | if ($in && $out_p){ |
||
+ | if($c){ |
||
+ | $cmd_p = "time blastpgp -i $in -d $db_b -j $iter_p -h $eval_p -v $num_shown_hits -b $num_shown_hits -o $out_p -Q $pssm_p -C $c"; |
||
+ | system($cmd_p) && print "Failed to execute $cmd_p"; |
||
+ | }elsif($r){ |
||
+ | $cmd_p = "time blastpgp -i $in -d $db_b -j $iter_p -h $eval_p -v $num_shown_hits -b $num_shown_hits -o $out_p -Q $pssm_p -R $r -t 1"; |
||
+ | system($cmd_p) && print "Failed to execute $cmd_p"; |
||
+ | }elsif($r && $c){ |
||
+ | $cmd_p = "time blastpgp -i $in -d $db_b -j $iter_p -h $eval_p -v $num_shown_hits -b $num_shown_hits -o $out_p -Q $pssm_p -R $r -t 1 -C $c"; |
||
+ | system($cmd_p) && print "Failed to execute $cmd_p"; |
||
+ | }else{ #normal search without a checkpoint file |
||
+ | $cmd_p = "time blastpgp -i $in -d $db_b -j $iter_p -h $eval_p -v $num_shown_hits -b $num_shown_hits -o $out_p -Q $pssm_p"; |
||
+ | system($cmd_p) && print "Failed to execute $cmd_p"; |
||
+ | } |
||
+ | } |
||
+ | |||
+ | |||
+ | # Run Hhblits: |
||
+ | if ($in && $out_h){ |
||
+ | $cmd_h = "time hhblits -i $in -d $db_h -n $iter_h -e $eval_h -Z $num_shown_hits -B $num_shown_hits -o $out_h -oa3m $pssm_h -ohhm $hhm"; |
||
+ | system($cmd_h) && print "Failed to execute $cmd_h"; |
||
+ | } |
||
</source> |
</source> |
Revision as of 12:12, 20 May 2013
Performs a BLAST/PSI-BLAST/HHblits run with custom parameters on biolab computers.
You can find the script run.pl
here on biocluster: /mnt/home/student/kalemanovm/master_practical/Assignment2_Alignments/scripts/task1
. The usage of the script with examples can also be found in the header of the file itself.
Usage: perl run.pl --in <input sequence file> --out_b <output blast file> --out_p <output psiblast file> --out_h <output hhblits file> [options]
--db_b <database BLAST/Psi-BLAST> default=big_80
--db_h <database HHblits> default=uniprot20
--iter_p <number of iterations Psi-BLAST> default=2 (in this script)
--iter_h <number of iterations HHblits,[1,8]> default=2
--eval_p <E-Value cutoff for Psi-BLAST, [0,1]> default=0.002
--eval_h <E-Value cutoff for HHblits, [0,1]> default=0.001
--c <checkpoins Psi-BLAST file name> ckeckpoint PSSM file should be created to search in other iterations (in other database)
--r <checkpoins Psi-BLAST file name> ckeckpoint PSSM file to be reused to search in other iterations (in other database)
--h prints this usage
Code: <source lang="perl">
- !/usr/bin/perl -w