Difference between revisions of "Sequence Alignments Protocol TSD"
From Bioinformatikpedia
(→Unique identifiers for Blast and HHblits) |
(→Unique identifiers for Blast and HHblits) |
||
Line 52: | Line 52: | ||
cut -f 2 psiblastBIGit2e002.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > psiblastBIGit2e002.finpdb |
cut -f 2 psiblastBIGit2e002.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > psiblastBIGit2e002.finpdb |
||
cut -f 2 psiblastBIGit2e-10.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > psiblastBIGit2e-10.finpdb |
cut -f 2 psiblastBIGit2e-10.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > psiblastBIGit2e-10.finpdb |
||
− | |||
cut -f 2 hhblits__460.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > hhbtemp |
cut -f 2 hhblits__460.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > hhbtemp |
||
sed "s/_\w//g" hhblits_pdb_1500.pdbmapping > hhbtemp2 |
sed "s/_\w//g" hhblits_pdb_1500.pdbmapping > hhbtemp2 |
Revision as of 15:14, 5 May 2012
Contents
Blast
blastall -p blastp -d /mnt/project/pracstrucfunc12/data/big/big_80 -i /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/P06865.fasta -b 1200 -v 1200 > /mnt/home/student/meiera /1_SeqAli/1_SeqSearch/blastall_1200alis.out
PSI-Blast
Big80
time blastpgp -C blastpgp1200_pssmdefault -d /mnt/project/pracstrucfunc12/data/big/big_80 -i /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/P06865.fasta -v 1200 -b 1200 > /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/blastpgp1200_default.out time blastpgp -C blastpgp1200_pssmit2e002 -d /mnt/project/pracstrucfunc12/data/big/big_80 -i /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/P06865.fasta -j 2 -h "0.002" -v 1200 -b 1200 > /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/blastpgp1200_it2e002.out time blastpgp -C blastpgp1200_pssmit10e002 -d /mnt/project/pracstrucfunc12/data/big/big_80 -i /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/P06865.fasta -j 10 -h "0.002" -v 1200 -b 1200 > /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/blastpgp1200_it10e002.out time blastpgp -C blastpgp1200_pssmit10e-10 -d /mnt/project/pracstrucfunc12/data/big/big_80 -i /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/P06865.fasta -j 10 -h "10E-10" -v 1200 -b 1200 > /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/blastpgp1200_it10e-10.out time blastpgp -C blastpgp1200_pssmit2e-10 -d /mnt/project/pracstrucfunc12/data/big/big_80 -i /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/P06865.fasta -j 2 -h "10E-10" -v 1200 -b 1200 > /mnt/home/student/meiera/1_SeqAli/1_SeqSearch/blastpgp1200_it2e-10.out
Big
PAT="/mnt/home/student/meiera/1_SeqAli/1_SeqSearch" time blastpgp -R $PAT/blastpgp1200_pssmit2e002 -d /mnt/project/pracstrucfunc12/data/big/big -i $PAT/P06865.fasta -v 3800 -b 3800 > $PAT/blastpgpBIG_3800_it2e002.out time blastpgp -R $PAT/blastpgp1200_pssmit10e002 -d /mnt/project/pracstrucfunc12/data/big/big -i $PAT/P06865.fasta -v 3800 -b 3800 > $PAT/blastpgpBIG_3800_it10e002.out time blastpgp -R $PAT/blastpgp1200_pssmit2e-10 -d /mnt/project/pracstrucfunc12/data/big/big -i $PAT/P06865.fasta -v 3800 -b 3800 > $PAT/blastpgpBIG_3800_it2e-10.out time blastpgp -R $PAT/blastpgp1200_pssmit10e-10 -d /mnt/project/pracstrucfunc12/data/big/big -i $PAT/P06865.fasta -v 3800 -b 3800 > $PAT/blastpgpBIG_3800_it10e-10.out
Find Unique matches: example
head -1 m8blastpgp1200_it2e002.out grep -n G1RUL9 m8blastpgp1200_it2e002.out tail -n +1233 m8blastpgp1200_it2e002.out | cut -f 2 | wc -l tail -n +1233 m8blastpgp1200_it2e002.out | uniq -w 44 | wc -l
HHblits
WD="/mnt/home/student/meiera/1_SeqAli/1_SeqSearch" time hhblits -i $WD/P06865.fasta -o $WD/hhblits_460_P06865.hhr -d /mnt/project/pracstrucfunc12/data/hhblits/uniprot20_current -Z 460 -B 460 > $WD/hhblits_460_P06865_stdout.log time hhblits -i $WD/P06865.fasta -n 10 -o $WD/hhblits_460_P06865_10it.hhr -d /mnt/project/pracstrucfunc12/data/hhblits/uniprot20_current -Z 460 -B 460 > $WD/hhblits_460_P06865_10_stdout.log time hhblits -i $WD/P06865.fasta -n 10 -o $WD/hhblits_2500_P06865_10it.hhr -d /mnt/project/pracstrucfunc12/data/hhblits/uniprot20_current -Z 2500 -B 2500 > $WD/hhblits_2500_P06865_10_stdout.log time hhblits -i $WD/P06865.fasta -n 2 -o $WD/hhblits_460_P06865_2it.hhr -d /mnt/project/pracstrucfunc12/data/hhblits/uniprot20_current -Z 460 -B 460 > $WD/hhblits_460_P06865_2_stdout.log time hhblits -i $WD/P06865.fasta -n 2 -o $WD/hhblits_2500_P06865_2it.hhr -d /mnt/project/pracstrucfunc12/data/hhblits/uniprot20_current -Z 2500 -B 2500 > $WD/hhblits_2500_P06865_2_stdout.log time hhsearch -i $WD/P06865.fasta -o $WD/hhblits_460_P06865.hhr -d /mnt/project/pracstrucfunc12/data/hhblits/pdb70_current_hhm_db -Z 14 -B 14 > $WD/hhblits_pdb_1500_P06865_stdout.log
Preparation of outputs
Sequence identity and e-values
#for both Blast and HHblits output /mnt/home/student/meiera/bin/1_parseidentity.pl
Uniprot sets for Venn diagrams
#for both Blast and HHblits output, also computation of the unique ids for HHblits /mnt/home/student/meiera/bin/1_getUniprotIDS.pl
Build PDB sets
Unique identifiers for Blast and HHblits
/mnt/home/student/meiera/bin/1_forPdbMapping.pl #Use uniprot.org's online mapping service on the .pdbmapping files and save the results to .mapped files
cut -f 2 psiblastBIGit10e002.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > psiblastBIGit10e002.finpdb cut -f 2 psiblastBIGit10e-10.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > psiblastBIGit10e-10.finpdb cut -f 2 psiblastBIGit2e002.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > psiblastBIGit2e002.finpdb cut -f 2 psiblastBIGit2e-10.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > psiblastBIGit2e-10.finpdb cut -f 2 hhblits__460.mapped | tr '[A-Z]' '[a-z]' | sort | uniq | grep -P "\w{4}" > hhbtemp sed "s/_\w//g" hhblits_pdb_1500.pdbmapping > hhbtemp2 cat hhbtemp hhbtemp2 | sort | uniq > hhblits.finpdb rm -f hhbtemp hhbtemp2