Difference between revisions of "ARSA search protocol"

From Bioinformatikpedia
m (Created page with " # Run blast: blastall -p blastp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -i ARSA.fas > ARSA.blast # find unique matches cut -f 2 ARSA.blast | uniq >…")
 
m
Line 1: Line 1:
  +
== Blast ==
# Run blast:
 
  +
# Run blast:
blastall -p blastp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -i ARSA.fas > ARSA.blast
 
  +
blastall -p blastp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -i ARSA.fas > ARSA.blast
   
 
# find unique matches
 
# find unique matches
Line 26: Line 27:
 
cut -f 2 ARSA.blast.uniq.worseE10 > ARSA.blast.uniq.worseE10.matchIDs
 
cut -f 2 ARSA.blast.uniq.worseE10 > ARSA.blast.uniq.worseE10.matchIDs
 
cut -d "|" -f 2 ARSA.blast.uniq.worseE10.matchIDs > ARSA.blast.uniq.worseE10.matchIDs.clean
 
cut -d "|" -f 2 ARSA.blast.uniq.worseE10.matchIDs > ARSA.blast.uniq.worseE10.matchIDs.clean
  +
  +
== PSI-Blast ==
  +
  +
# run PSI-Blast with different combinations of parameters
  +
> blastpgp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -j 2 -i ARSA.fas -o ARSA.psiBlast.j2.hDefault
  +
280.470u 30.420s 5:55.56 87.4% 0+0k 18005712+1880io 792pf+0w
  +
> blastpgp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -j 10 -i ARSA.fas -o ARSA.psiBlast.j10.hDefault
  +
[blastpgp] ERROR: ncbiapi [000.000] ObjMgrNextAvailEntityID failed with idx 2048
  +
2111.750u 44.740s 36:55.25 97.3% 0+0k 12951072+11856io 1271pf+0w
  +
> blastpgp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -j 2 -h 1e-10 -i ARSA.fas -o ARSA.psiBlast.j2.h1e-10
  +
> blastpgp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -j 10 -h 1e-10 -i ARSA.fas -o ARSA.psiBlast.j10.h1e-10

Revision as of 13:39, 12 April 2012

Blast

# Run blast:
blastall -p blastp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -i ARSA.fas > ARSA.blast
# find unique matches
cut -f 2 ARSA.blast | uniq > ARSA.blast.matchIDs.uniq
uniq -w 44 ARSA.blast > ARSA.blast.uniq
# use R to plot: 
# within R: histogram of log10 Evalue for blast run
blastTable = read.table("ARSA.blast.uniq")
pdf("ARSA_blastHistogram_eVal.pdf")
logeval.hist <- hist(log10(blastTable$V11), breaks=100, plot=FALSE) 
plot(logeval.hist, col="blue", main="BLAST logarithm of E-values", xlab="lg(E-Value)")
dev.off()

# within R: histogram of %identity for blast run
blastTable = read.table("ARSA.blast.uniq")
pdf("ARSA_blastHistogram_id.pdf")
id.hist <- hist(blastTable$V3, breaks=5, plot=FALSE)
plot(id.hist, col="blue", main="BLAST sequence identity distribution", xlab="percent identity")
dev.off()
 # create lists of Uniprot IDs for analysis for GO annotations
 grep -n "1e-10" ARSA.blast.uniq
 tail -n +1721 ARSA.blast.uniq > ARSA.blast.uniq.worseE10
 cut -f 2 ARSA.blast.uniq.worseE10  > ARSA.blast.uniq.worseE10.matchIDs
 cut -d "|" -f 2 ARSA.blast.uniq.worseE10.matchIDs > ARSA.blast.uniq.worseE10.matchIDs.clean

PSI-Blast

# run PSI-Blast with different combinations of parameters
> blastpgp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -j 2 -i ARSA.fas -o ARSA.psiBlast.j2.hDefault
280.470u 30.420s 5:55.56 87.4%  0+0k 18005712+1880io 792pf+0w
> blastpgp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -j 10 -i ARSA.fas -o ARSA.psiBlast.j10.hDefault
[blastpgp] ERROR: ncbiapi [000.000]  ObjMgrNextAvailEntityID failed with idx 2048
2111.750u 44.740s 36:55.25 97.3%        0+0k 12951072+11856io 1271pf+0w 
> blastpgp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -j 2 -h 1e-10 -i ARSA.fas -o ARSA.psiBlast.j2.h1e-10
> blastpgp -d /mnt/project/pracstrucfunc12/data/big/big_80 -m 8 -b 10000 -j 10 -h 1e-10 -i ARSA.fas -o ARSA.psiBlast.j10.h1e-10