Difference between revisions of "Task3 Hemochromatosis Protocol"
Bernhoferm (talk | contribs) (→Transmembrane Helices) |
(→Secondary Structure) |
||
(8 intermediate revisions by one other user not shown) | |||
Line 40: | Line 40: | ||
wget http://www.uniprot.org/uniprot/P11279.fasta |
wget http://www.uniprot.org/uniprot/P11279.fasta |
||
</source> |
</source> |
||
+ | |||
+ | <br style="clear:both;"> |
||
=== Secondary Structure === |
=== Secondary Structure === |
||
Line 55: | Line 57: | ||
reprof -i Q08209.fasta -o ../pred/ &> ../Q08209.log |
reprof -i Q08209.fasta -o ../pred/ &> ../Q08209.log |
||
</source> |
</source> |
||
+ | |||
+ | |||
+ | For the PsiPred predictions we used the [http://bioinf.cs.ucl.ac.uk/psipred/ PsiPred Webserver 3.0] with default settings (Mask low complexity regions on). |
||
+ | |||
+ | |||
+ | DSSP predictions were made with [http://mrs.cmbi.ru.nl/hsspsoap/ DSSP Webserver]. PDB files used: |
||
+ | * Q30201 -> 1A6Z |
||
+ | * P10775 -> 2BNH |
||
+ | * Q9X0E6 -> 1KR4 |
||
+ | * Q08209 -> 1AUI |
||
+ | |||
+ | The DSSP secondary structure was extracted with |
||
+ | <source lang="perl"> |
||
+ | #!/usr/bin/perl |
||
+ | |||
+ | use strict; |
||
+ | use warnings; |
||
+ | |||
+ | my $dsspFile=$ARGV[0]; |
||
+ | my $secStructure=""; |
||
+ | my $sequence=""; |
||
+ | my $lastResNr=-1; |
||
+ | open (DSSPFILE,"<$dsspFile") or die "DSSP-file not found.\n"; |
||
+ | while (<DSSPFILE>){ |
||
+ | my $headerLines=$_; |
||
+ | #print "damn\n"; |
||
+ | if ($headerLines=~m/\s*#\s*RESIDUE\s*AA\s*STRUCTURE/gi){ |
||
+ | while (<DSSPFILE>){ |
||
+ | |||
+ | my $line=$_; |
||
+ | if ($line=~m/\s*\d*\s*!\*/){ |
||
+ | while (<DSSPFILE>){ |
||
+ | } |
||
+ | } else { |
||
+ | if ($line=~m/^\s*\d*\s*!/){ |
||
+ | print "$line WOOT WOOT WOOT!\n"; |
||
+ | } else { |
||
+ | $line=~m/^\s*\d+\s+(\d*)\s*/gi; |
||
+ | my @lineArray=split(//,$line); |
||
+ | # print "Hier: $1\n$2\n$3\n\n"; |
||
+ | # print "$lineArray[13]\n"; |
||
+ | # print "$lineArray[16]\n"; |
||
+ | |||
+ | for (my $i=1;$i<$1-$lastResNr;$i++){ |
||
+ | $secStructure=$secStructure . "C"; |
||
+ | $sequence=$sequence . "*"; |
||
+ | } |
||
+ | $lastResNr=$1; |
||
+ | $sequence=$sequence . $lineArray[13]; |
||
+ | my $currentSS=$lineArray[16]; |
||
+ | if ($currentSS eq " "){ |
||
+ | $currentSS="C"; |
||
+ | } |
||
+ | $secStructure=$secStructure . $currentSS; |
||
+ | } |
||
+ | } |
||
+ | } |
||
+ | } |
||
+ | } |
||
+ | close(DSSPFILE); |
||
+ | print "$sequence\n"; |
||
+ | print "$secStructure\n"; |
||
+ | |||
+ | $secStructure=~tr/HGIBETS/HHHEECC/; |
||
+ | |||
+ | print "$secStructure\n"; |
||
+ | |||
+ | |||
+ | #$secStructure=~tr/HGIBETS/HHHEECC/; |
||
+ | #print "$secStructure\n"; |
||
+ | </source> |
||
+ | |||
+ | |||
+ | The SOV and Q3 Values were calculated with the following Java-program: |
||
+ | |||
+ | <source lang="java"> |
||
+ | |||
+ | public class main { |
||
+ | |||
+ | /** |
||
+ | * @param args |
||
+ | */ |
||
+ | public static void main(String[] args) { |
||
+ | // TODO Auto-generated method stub |
||
+ | |||
+ | System.out.println("1AUI"); |
||
+ | String dssp="CCCCCCCCCCCCCCCCCCCCCCCCCCCCECHHHHECCCCCECHHHHHHHHHCCCCECHHHHHHHHHHHHHHHHCCCCEEEECCCEEEECCCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCCCCCHHHHHHHHHHHHHHCCCCEEECCCCCCCHHHHHHCCHHHHHHHHCCHHHHHHHHHHHCCCCCEEEECCCEEEECCCCCCCCCCHHHHHHCCCCCCCCCCCHHHHHHHCEECCCCCCCCCCCCEEECCCCCCCEEECHHHHHHHHHHCCCCEEEECCCCCCCCEEECCECCCCCCECEEEECCCCCHHHCCCCCEEEEEEECCEEEEEEECCCCCCCCCHHHCCHHHHHHHHHHHHHHHHHHHHHCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCHHHHHHHHHHHHCCCCC"; |
||
+ | String predicted="CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCHHHHHHHHHHCCCCCHHHHHHHHHHHHHHHHHCCCCEEECCCEEEECCCCCHHHHHHHHHHHCCCCCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHCCCCCEEEECCCCCCCCCCCCCCHHHHHHHHCCHHHHHHHHHHCCCCHHHHHCCCCEEEEECCCCCCCCCHHHHCCCCCCCCCCCCCCCCHHCCCCCCCCCCCCCCCCCCCCCCCCCCEEECCHHHHHHHHHHCCCCHHHHHHHHHHHCCCCCCCCCCCCCCCEEEEECCCCCCCCCCCCEEEEEEECCCCEEEEEECCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHCCCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHCCCCCCCCCCHHHHHHHHHHHCCCCCC"; |
||
+ | System.out.println("Der Q3 Score war :"+Q3Score(predicted,dssp)+"\nDer Q3EScore war :"+QEScore(predicted,dssp)+"\nDer Q3HScore war :"+QHScore(predicted,dssp)+"\nDer Q3CScore war :"+QCScore(predicted,dssp)); |
||
+ | System.out.println("DerSOV war :"+SOVScore(predicted,dssp)); |
||
+ | System.out.println("1A6Z"); |
||
+ | dssp="CCCCCCEEEEEEEEEEECCCCCCECCEEEEEECCEEEEEEECCCCCEEECCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHCCCCCCCCCEEEEEEEEEECCCCCEEEEEEEEECCEEEEEEEHHHCEEEECCHHHHHHHHHHHCCCHHHHHHHHHHHCHHHHHHHHHHHHHCCCCCCCECCEEEEEEEECCCCEEEEEEEEEEECCCCEEEEEECCEECCHHHCCCCEEEECCCCCEEEEEEEEECCCHHHHEEEEEECCCCCCCEEEEC"; |
||
+ | predicted="CCCCCCCCCCEEEEEECCCCCCCCEEEEEEEECCEEEEEECCCCCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHCCCCCCCCCEEEEECCCEECCCCCCCCEEEECCCCCCCCCCCCCCCCEECCCCHHHHHHHHHHHHHHHHHHHHCCCCCCHHHHHHHHHHCCCCCCCCCCCCCEEEECCCCCCCCEEEEEECCCCCCCCEEEEEECCCCCCCCCCCCCCCEECCCCCCEEEEEEEECCCCCCCEEEEEECCCCCCCEEEEE"; |
||
+ | System.out.println("Der Q3 Score war :"+Q3Score(predicted,dssp)+"\nDer Q3EScore war :"+QEScore(predicted,dssp)+"\nDer Q3HScore war :"+QHScore(predicted,dssp)+"\nDer Q3CScore war :"+QCScore(predicted,dssp)); |
||
+ | System.out.println("DerSOV war :"+SOVScore(predicted,dssp)); |
||
+ | System.out.println("1KR4"); |
||
+ | dssp="EEEEEEEECCHHHHHHHHHHHHHCCCCCEEEEEEEEEEEEECCEEEEEEEEEEEEEEEHHHHHHHHHHHHHHCCCCCCCEEEECCCCEEHHHHHHHHHHCC"; |
||
+ | predicted="CEEEEECCCCHHHHHHHHHHHHHCCCCCEEEEEEEEEEEEECCCEEECCEEEEEEECCCCCHHHHHHHHHHHCCCCCCEEEEEECCCCCHHHHHHHHHHCC"; |
||
+ | System.out.println("Der Q3 Score war :"+Q3Score(predicted,dssp)+"\nDer Q3EScore war :"+QEScore(predicted,dssp)+"\nDer Q3HScore war :"+QHScore(predicted,dssp)+"\nDer Q3CScore war :"+QCScore(predicted,dssp)); |
||
+ | System.out.println("DerSOV war :"+SOVScore(predicted,dssp)); |
||
+ | System.out.println("2BNH"); |
||
+ | dssp="CECCEECCCCCHHHHHHHHHHHCCCCEEEEECCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCHHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHCHHHHHHHCCCCCEEECCCCCCHHHHHHHHHHHHHCCCCCCCEEECCCCCCEHHHHHHHHHHHHHCCCCCEEECCCCECHHHHHHHHHHHHHCCCCCCCEEECCCCCCCHHHHHHHHHHHHHCCCCCEEECCCCCCHHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHHHHHHHHHCCCCCEEECCCCCCHHHHHHHHHHHHCCCCCCCCEEECCCCCCEHHHHHHHHHHHHHCCCCCEEECCCCECHHHHHHHHHHHCCCCCCCCCEEECCCCCCCHHHHHHHHHHHHHCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHHHHHHHHHCCCCEEEC"; |
||
+ | predicted="CEEECCCCCCCHHHHHHHHHHHCCCCEEEECCCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEEEECCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEEEECCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCCHHHHHHHHHHCCCCCCCCCEEECCCCCCCHHHHHHHHHHHHCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHHHHHHHHCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEEECCCCCCHHHHHHHHHHHHCCCCCCEEECCCCCCCCHHHHHHHHHCCCCCCCEEEEECCCCCCCHHHHHHHHHHHHCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHHHHHHHHCCCCCEECC"; |
||
+ | System.out.println("Der Q3 Score war :"+Q3Score(predicted,dssp)+"\nDer Q3EScore war :"+QEScore(predicted,dssp)+"\nDer Q3HScore war :"+QHScore(predicted,dssp)+"\nDer Q3CScore war :"+QCScore(predicted,dssp)); |
||
+ | System.out.println("DerSOV war :"+SOVScore(predicted,dssp)); |
||
+ | |||
+ | |||
+ | } |
||
+ | |||
+ | //calculate Q3Score |
||
+ | private static double Q3Score(String predictedSequence, String observedSequence){ |
||
+ | int correctCounter=0; |
||
+ | |||
+ | for (int i=0;i<predictedSequence.length();i++){ |
||
+ | if (predictedSequence.charAt(i)==observedSequence.charAt(i)){ |
||
+ | correctCounter++; |
||
+ | } |
||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | return 100*(double)correctCounter/(double)predictedSequence.length(); |
||
+ | } |
||
+ | |||
+ | //calculate QEScore |
||
+ | private static double QEScore(String predictedSequence, String observedSequence){ |
||
+ | int correctCounter=0; |
||
+ | int eCounter=0; |
||
+ | for (int i=0;i<predictedSequence.length();i++){ |
||
+ | if (predictedSequence.charAt(i)=='E'&&observedSequence.charAt(i)=='E'){ |
||
+ | correctCounter++; |
||
+ | } |
||
+ | if (observedSequence.charAt(i)=='E'){ |
||
+ | eCounter++; |
||
+ | } |
||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | return 100*(double)correctCounter/(double)eCounter; |
||
+ | } |
||
+ | |||
+ | //calculate QHScore |
||
+ | private static double QHScore(String predictedSequence, String observedSequence){ |
||
+ | int correctCounter=0; |
||
+ | int hCounter=0; |
||
+ | for (int i=0;i<predictedSequence.length();i++){ |
||
+ | if (predictedSequence.charAt(i)=='H'&&observedSequence.charAt(i)=='H'){ |
||
+ | correctCounter++; |
||
+ | } |
||
+ | if (observedSequence.charAt(i)=='H'){ |
||
+ | hCounter++; |
||
+ | } |
||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | return 100*(double)correctCounter/(double)hCounter; |
||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | //calculate QCScore |
||
+ | private static double QCScore(String predictedSequence, String observedSequence){ |
||
+ | int correctCounter=0; |
||
+ | int cCounter=0; |
||
+ | for (int i=0;i<predictedSequence.length();i++){ |
||
+ | if (predictedSequence.charAt(i)=='C'&&observedSequence.charAt(i)=='C'){ |
||
+ | correctCounter++; |
||
+ | } |
||
+ | if(observedSequence.charAt(i)=='C'){ |
||
+ | cCounter++; |
||
+ | } |
||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | return 100*(double)correctCounter/(double)cCounter; |
||
+ | } |
||
+ | |||
+ | //calculate total SOVScore |
||
+ | private static double SOVScore(String predictedSequence, String observedSequence){ |
||
+ | double[] tempValuesH=SOVi(predictedSequence,observedSequence,'H'); |
||
+ | double[] tempValuesE=SOVi(predictedSequence,observedSequence,'E'); |
||
+ | double[] tempValuesC=SOVi(predictedSequence,observedSequence,'C'); |
||
+ | double n= tempValuesC[1]+tempValuesE[1]+tempValuesH[1]; |
||
+ | |||
+ | double sumE=tempValuesE[0]; |
||
+ | double sumH=tempValuesH[0]; |
||
+ | double sumC=tempValuesC[0]; |
||
+ | return 100*(1/n)* (sumE+sumH+sumC); |
||
+ | |||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | // calculate SOV(i) values |
||
+ | private static double[] SOVi(String predictedSequence, String observedSequence,char character){ |
||
+ | int startpos1=-1; |
||
+ | int endpos1=-1; |
||
+ | int startpos2=-1; |
||
+ | int endpos2=-1; |
||
+ | double sovSummand=0; |
||
+ | int totalLenS1=0; |
||
+ | int totalLenS1notfound=0; |
||
+ | double[] returnvalues=new double[2]; |
||
+ | int lastUsedEndpos2=-1; |
||
+ | |||
+ | predictedSequence=predictedSequence+"I"; //I is appended to evaluate the last character if the original-sequence |
||
+ | observedSequence=observedSequence+"I"; |
||
+ | |||
+ | |||
+ | for (int i=0;i<predictedSequence.length();i++){ |
||
+ | |||
+ | if (i>endpos1&&endpos1>=0){ |
||
+ | endpos1=-1; |
||
+ | startpos1=-1; |
||
+ | } |
||
+ | if (i>endpos2&&endpos2>=0&&lastUsedEndpos2<endpos2){ |
||
+ | totalLenS1notfound+=(endpos2-startpos2+1); |
||
+ | endpos2=-1; |
||
+ | startpos2=-1; |
||
+ | } else if(i>endpos2&&endpos2>=0){ |
||
+ | endpos2=-1; |
||
+ | startpos2=-1; |
||
+ | } |
||
+ | |||
+ | |||
+ | //finding a sequence on predicted |
||
+ | if (startpos1<0 && predictedSequence.charAt(i)==character){ |
||
+ | startpos1=i; |
||
+ | for (int j=i;j<predictedSequence.length();j++){ |
||
+ | if (predictedSequence.charAt(j)!=character&&endpos1<0){ |
||
+ | endpos1=j-1; |
||
+ | j=predictedSequence.length(); |
||
+ | //if a sequence on the observed sequence exists, an overlap has been found-->calculate SOV-sum |
||
+ | if (startpos2>=0){ |
||
+ | sovSummand+= minmaxov(startpos2,endpos2,startpos1,endpos1); |
||
+ | totalLenS1+=endpos2-startpos2+1; |
||
+ | |||
+ | |||
+ | if (endpos1==endpos2){ |
||
+ | lastUsedEndpos2=endpos2; |
||
+ | i=endpos2+1; |
||
+ | endpos1=-1; |
||
+ | startpos1=-1; |
||
+ | endpos2=-1; |
||
+ | startpos2=-1; |
||
+ | } else if (endpos1<endpos2){ |
||
+ | lastUsedEndpos2=endpos2; |
||
+ | i=endpos1+1; |
||
+ | endpos1=-1; |
||
+ | startpos1=-1; |
||
+ | |||
+ | |||
+ | }else { |
||
+ | lastUsedEndpos2=endpos2; |
||
+ | i=endpos2+1; |
||
+ | startpos2=-1; |
||
+ | endpos2=-1; |
||
+ | |||
+ | } |
||
+ | } |
||
+ | } |
||
+ | } |
||
+ | } |
||
+ | // finding a sequence on observed |
||
+ | if (i>endpos2&&endpos2>=0&&lastUsedEndpos2<endpos2){ |
||
+ | totalLenS1notfound+=(endpos2-startpos2+1); |
||
+ | endpos2=-1; |
||
+ | startpos2=-1; |
||
+ | }else if(i>endpos2&&endpos2>=0){ |
||
+ | endpos2=-1; |
||
+ | startpos2=-1; |
||
+ | } |
||
+ | |||
+ | //if a sequence on the predicted sequence exists, an overlap has been found-->calculate SOV-sum |
||
+ | if (startpos2<0 && observedSequence.charAt(i)==character){ |
||
+ | startpos2=i; |
||
+ | for (int j=i;j<observedSequence.length();j++){ |
||
+ | if (observedSequence.charAt(j)!=character&&endpos2<0){ |
||
+ | endpos2=j-1; |
||
+ | j=observedSequence.length(); |
||
+ | if (startpos1>=0){ |
||
+ | totalLenS1+=endpos2-startpos2+1; |
||
+ | |||
+ | |||
+ | sovSummand+= minmaxov(startpos2,endpos2,startpos1,endpos1); |
||
+ | if (endpos1==endpos2){ |
||
+ | i=endpos2+1; |
||
+ | lastUsedEndpos2=endpos2; |
||
+ | endpos1=-1; |
||
+ | startpos1=-1; |
||
+ | endpos2=-1; |
||
+ | startpos2=-1; |
||
+ | } else if (endpos1<endpos2){ |
||
+ | i=endpos1+1; |
||
+ | endpos1=-1; |
||
+ | startpos1=-1; |
||
+ | lastUsedEndpos2=endpos2; |
||
+ | }else { |
||
+ | lastUsedEndpos2=endpos2; |
||
+ | i=endpos2+1; |
||
+ | endpos2=-1; |
||
+ | startpos2=-1; |
||
+ | |||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | } |
||
+ | } |
||
+ | |||
+ | } |
||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | |||
+ | } |
||
+ | |||
+ | |||
+ | |||
+ | returnvalues[0]=sovSummand; |
||
+ | returnvalues[1]=totalLenS1+totalLenS1notfound; //the N-value |
||
+ | |||
+ | return returnvalues; |
||
+ | |||
+ | |||
+ | |||
+ | |||
+ | } |
||
+ | |||
+ | |||
+ | // returns the value after the sum-symbol in the SOV-formula |
||
+ | private static double minmaxov(int start1, int end1, int start2,int end2){ |
||
+ | int minov; |
||
+ | int maxov; |
||
+ | int delta; |
||
+ | int lenS1=end1-start1+1; |
||
+ | int lenS2=end2-start2+1; |
||
+ | |||
+ | |||
+ | minov= Math.min(end1, end2)-Math.max(start1, start2)+1; |
||
+ | maxov= Math.max(end1, end2)-Math.min(start1, start2)+1; |
||
+ | delta=Math.min((maxov-minov), Math.min(minov, Math.min(lenS1/2, lenS2/2))); |
||
+ | |||
+ | return ((minov+delta)/(double)maxov)*lenS1; |
||
+ | |||
+ | |||
+ | |||
+ | |||
+ | } |
||
+ | |||
+ | } |
||
+ | </source> |
||
+ | |||
+ | <br style="clear:both;"> |
||
=== Disorder === |
=== Disorder === |
||
Line 71: | Line 434: | ||
iupred $DIS/fasta/Q08209.fasta long > $DIS/pred/Q08209.pred |
iupred $DIS/fasta/Q08209.fasta long > $DIS/pred/Q08209.pred |
||
</source> |
</source> |
||
+ | |||
+ | <br style="clear:both;"> |
||
=== Transmembrane Helices === |
=== Transmembrane Helices === |
||
Line 98: | Line 463: | ||
$KAL -f fasta -input $TMH/tmp/P47863.bgout -output $TMH/tmp/P47863.kalout |
$KAL -f fasta -input $TMH/tmp/P47863.bgout -output $TMH/tmp/P47863.kalout |
||
</source> |
</source> |
||
+ | |||
PolyPhobius predictions: |
PolyPhobius predictions: |
||
Line 115: | Line 481: | ||
$POL -poly $TMH/tmp/P47863.kalout > $TMH/pred/P47863.pol |
$POL -poly $TMH/tmp/P47863.kalout > $TMH/pred/P47863.pol |
||
</source> |
</source> |
||
+ | |||
+ | <br style="clear:both;"> |
||
=== Signal Peptides === |
=== Signal Peptides === |
||
+ | |||
+ | We used the [http://www.cbs.dtu.dk/services/SignalP/ SignalP Webserver 4.0] for the predictions. |
||
+ | Settings were not changed (i.e. we used the default settings). |
||
+ | |||
+ | <br style="clear:both;"> |
||
=== GO Terms === |
=== GO Terms === |
||
+ | |||
+ | For the GO term predictions the webservers were used: |
||
+ | * [http://genius.embnet.dkfz-heidelberg.de/menu/cgi-bin/w2h-open/w2h.open/w2h.startthis?SIMGO=w2h.welcome GOPET] |
||
+ | * [http://www.cbs.dtu.dk/services/ProtFun/ ProtFun 2.2] |
||
+ | |||
+ | <br style="clear:both;"> |
||
+ | |||
+ | == Other Scripts == |
||
+ | |||
+ | <br style="clear:both;"> |
Latest revision as of 10:03, 20 May 2012
Contents
Data Acquisition
Retrieve all sequences: <source lang="bash">
- !/bin/bash
- Secondary Structure
cd /mnt/home/student/bernhoferm/mstrprkt/task3/ss/fasta/
wget http://www.uniprot.org/uniprot/Q30201.fasta wget http://www.uniprot.org/uniprot/P10775.fasta wget http://www.uniprot.org/uniprot/Q9X0E6.fasta wget http://www.uniprot.org/uniprot/Q08209.fasta
- Disorder
cd /mnt/home/student/bernhoferm/mstrprkt/task3/disorder/fasta/
wget http://www.uniprot.org/uniprot/Q30201.fasta wget http://www.uniprot.org/uniprot/P10775.fasta wget http://www.uniprot.org/uniprot/Q9X0E6.fasta wget http://www.uniprot.org/uniprot/Q08209.fasta
- Transmembrane Helices
cd /mnt/home/student/bernhoferm/mstrprkt/task3/tmh/fasta/
wget http://www.uniprot.org/uniprot/Q30201.fasta wget http://www.uniprot.org/uniprot/P35462.fasta wget http://www.uniprot.org/uniprot/Q9YDF8.fasta wget http://www.uniprot.org/uniprot/P47863.fasta
- Signal Peptides
cd /mnt/home/student/bernhoferm/mstrprkt/task3/sp/fasta/
wget http://www.uniprot.org/uniprot/Q30201.fasta wget http://www.uniprot.org/uniprot/P02768.fasta wget http://www.uniprot.org/uniprot/P47863.fasta wget http://www.uniprot.org/uniprot/P11279.fasta </source>
Secondary Structure
ReProf predictions: <source lang="bash">
- !/bin/bash
- Secondary Structure
cd /mnt/home/student/bernhoferm/mstrprkt/task3/ss/fasta/
reprof -i Q30201.fasta -o ../pred/ &> ../Q30201.log reprof -i P10775.fasta -o ../pred/ &> ../P10775.log reprof -i Q9X0E6.fasta -o ../pred/ &> ../Q9X0E6.log reprof -i Q08209.fasta -o ../pred/ &> ../Q08209.log </source>
For the PsiPred predictions we used the PsiPred Webserver 3.0 with default settings (Mask low complexity regions on).
DSSP predictions were made with DSSP Webserver. PDB files used:
- Q30201 -> 1A6Z
- P10775 -> 2BNH
- Q9X0E6 -> 1KR4
- Q08209 -> 1AUI
The DSSP secondary structure was extracted with <source lang="perl">
- !/usr/bin/perl
use strict; use warnings;
my $dsspFile=$ARGV[0]; my $secStructure=""; my $sequence=""; my $lastResNr=-1; open (DSSPFILE,"<$dsspFile") or die "DSSP-file not found.\n"; while (<DSSPFILE>){ my $headerLines=$_; #print "damn\n"; if ($headerLines=~m/\s*#\s*RESIDUE\s*AA\s*STRUCTURE/gi){ while (<DSSPFILE>){
my $line=$_; if ($line=~m/\s*\d*\s*!\*/){ while (<DSSPFILE>){ } } else { if ($line=~m/^\s*\d*\s*!/){ print "$line WOOT WOOT WOOT!\n"; } else { $line=~m/^\s*\d+\s+(\d*)\s*/gi; my @lineArray=split(//,$line); # print "Hier: $1\n$2\n$3\n\n"; # print "$lineArray[13]\n"; # print "$lineArray[16]\n";
for (my $i=1;$i<$1-$lastResNr;$i++){ $secStructure=$secStructure . "C"; $sequence=$sequence . "*"; } $lastResNr=$1; $sequence=$sequence . $lineArray[13]; my $currentSS=$lineArray[16]; if ($currentSS eq " "){ $currentSS="C"; } $secStructure=$secStructure . $currentSS; } } } } } close(DSSPFILE); print "$sequence\n"; print "$secStructure\n";
$secStructure=~tr/HGIBETS/HHHEECC/;
print "$secStructure\n";
- $secStructure=~tr/HGIBETS/HHHEECC/;
- print "$secStructure\n";
</source>
The SOV and Q3 Values were calculated with the following Java-program:
<source lang="java">
public class main {
/** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub
System.out.println("1AUI"); String dssp="CCCCCCCCCCCCCCCCCCCCCCCCCCCCECHHHHECCCCCECHHHHHHHHHCCCCECHHHHHHHHHHHHHHHHCCCCEEEECCCEEEECCCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCCCCCHHHHHHHHHHHHHHCCCCEEECCCCCCCHHHHHHCCHHHHHHHHCCHHHHHHHHHHHCCCCCEEEECCCEEEECCCCCCCCCCHHHHHHCCCCCCCCCCCHHHHHHHCEECCCCCCCCCCCCEEECCCCCCCEEECHHHHHHHHHHCCCCEEEECCCCCCCCEEECCECCCCCCECEEEECCCCCHHHCCCCCEEEEEEECCEEEEEEECCCCCCCCCHHHCCHHHHHHHHHHHHHHHHHHHHHCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCHHHHHHHHHHHHCCCCC"; String predicted="CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCHHHHHHHHHHCCCCCHHHHHHHHHHHHHHHHHCCCCEEECCCEEEECCCCCHHHHHHHHHHHCCCCCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHCCCCCEEEECCCCCCCCCCCCCCHHHHHHHHCCHHHHHHHHHHCCCCHHHHHCCCCEEEEECCCCCCCCCHHHHCCCCCCCCCCCCCCCCHHCCCCCCCCCCCCCCCCCCCCCCCCCCEEECCHHHHHHHHHHCCCCHHHHHHHHHHHCCCCCCCCCCCCCCCEEEEECCCCCCCCCCCCEEEEEEECCCCEEEEEECCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHCCCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHCCCCCCCCCCHHHHHHHHHHHCCCCCC"; System.out.println("Der Q3 Score war :"+Q3Score(predicted,dssp)+"\nDer Q3EScore war :"+QEScore(predicted,dssp)+"\nDer Q3HScore war :"+QHScore(predicted,dssp)+"\nDer Q3CScore war :"+QCScore(predicted,dssp)); System.out.println("DerSOV war :"+SOVScore(predicted,dssp)); System.out.println("1A6Z"); dssp="CCCCCCEEEEEEEEEEECCCCCCECCEEEEEECCEEEEEEECCCCCEEECCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHCCCCCCCCCEEEEEEEEEECCCCCEEEEEEEEECCEEEEEEEHHHCEEEECCHHHHHHHHHHHCCCHHHHHHHHHHHCHHHHHHHHHHHHHCCCCCCCECCEEEEEEEECCCCEEEEEEEEEEECCCCEEEEEECCEECCHHHCCCCEEEECCCCCEEEEEEEEECCCHHHHEEEEEECCCCCCCEEEEC"; predicted="CCCCCCCCCCEEEEEECCCCCCCCEEEEEEEECCEEEEEECCCCCCCCCCCCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHHHHHCCCCCCCCCEEEEECCCEECCCCCCCCEEEECCCCCCCCCCCCCCCCEECCCCHHHHHHHHHHHHHHHHHHHHCCCCCCHHHHHHHHHHCCCCCCCCCCCCCEEEECCCCCCCCEEEEEECCCCCCCCEEEEEECCCCCCCCCCCCCCCEECCCCCCEEEEEEEECCCCCCCEEEEEECCCCCCCEEEEE"; System.out.println("Der Q3 Score war :"+Q3Score(predicted,dssp)+"\nDer Q3EScore war :"+QEScore(predicted,dssp)+"\nDer Q3HScore war :"+QHScore(predicted,dssp)+"\nDer Q3CScore war :"+QCScore(predicted,dssp)); System.out.println("DerSOV war :"+SOVScore(predicted,dssp)); System.out.println("1KR4"); dssp="EEEEEEEECCHHHHHHHHHHHHHCCCCCEEEEEEEEEEEEECCEEEEEEEEEEEEEEEHHHHHHHHHHHHHHCCCCCCCEEEECCCCEEHHHHHHHHHHCC"; predicted="CEEEEECCCCHHHHHHHHHHHHHCCCCCEEEEEEEEEEEEECCCEEECCEEEEEEECCCCCHHHHHHHHHHHCCCCCCEEEEEECCCCCHHHHHHHHHHCC"; System.out.println("Der Q3 Score war :"+Q3Score(predicted,dssp)+"\nDer Q3EScore war :"+QEScore(predicted,dssp)+"\nDer Q3HScore war :"+QHScore(predicted,dssp)+"\nDer Q3CScore war :"+QCScore(predicted,dssp)); System.out.println("DerSOV war :"+SOVScore(predicted,dssp)); System.out.println("2BNH"); dssp="CECCEECCCCCHHHHHHHHHHHCCCCEEEEECCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCHHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHCHHHHHHHCCCCCEEECCCCCCHHHHHHHHHHHHHCCCCCCCEEECCCCCCEHHHHHHHHHHHHHCCCCCEEECCCCECHHHHHHHHHHHHHCCCCCCCEEECCCCCCCHHHHHHHHHHHHHCCCCCEEECCCCCCHHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHHHHHHHHHCCCCCEEECCCCCCHHHHHHHHHHHHCCCCCCCCEEECCCCCCEHHHHHHHHHHHHHCCCCCEEECCCCECHHHHHHHHHHHCCCCCCCCCEEECCCCCCCHHHHHHHHHHHHHCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHHHHHHHHHCCCCEEEC"; predicted="CEEECCCCCCCHHHHHHHHHHHCCCCEEEECCCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEEEECCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEEEECCCCCHHHHHHHHHHHCCCCCCCEEECCCCCCCHHHHHHHHHHCCCCCCCCCEEECCCCCCCHHHHHHHHHHHHCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHHHHHHHHCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEEECCCCCCHHHHHHHHHHHHCCCCCCEEECCCCCCCCHHHHHHHHHCCCCCCCEEEEECCCCCCCHHHHHHHHHHHHCCCCCCEEECCCCCCCHHHHHHHHHHHCCCCCCCCEEECCCCCCCHHHHHHHHHHHHCCCCCEECC"; System.out.println("Der Q3 Score war :"+Q3Score(predicted,dssp)+"\nDer Q3EScore war :"+QEScore(predicted,dssp)+"\nDer Q3HScore war :"+QHScore(predicted,dssp)+"\nDer Q3CScore war :"+QCScore(predicted,dssp)); System.out.println("DerSOV war :"+SOVScore(predicted,dssp));
}
//calculate Q3Score private static double Q3Score(String predictedSequence, String observedSequence){ int correctCounter=0;
for (int i=0;i<predictedSequence.length();i++){ if (predictedSequence.charAt(i)==observedSequence.charAt(i)){ correctCounter++; } }
return 100*(double)correctCounter/(double)predictedSequence.length(); }
//calculate QEScore private static double QEScore(String predictedSequence, String observedSequence){ int correctCounter=0; int eCounter=0; for (int i=0;i<predictedSequence.length();i++){ if (predictedSequence.charAt(i)=='E'&&observedSequence.charAt(i)=='E'){ correctCounter++; } if (observedSequence.charAt(i)=='E'){ eCounter++; } }
return 100*(double)correctCounter/(double)eCounter; }
//calculate QHScore private static double QHScore(String predictedSequence, String observedSequence){ int correctCounter=0; int hCounter=0; for (int i=0;i<predictedSequence.length();i++){ if (predictedSequence.charAt(i)=='H'&&observedSequence.charAt(i)=='H'){ correctCounter++; } if (observedSequence.charAt(i)=='H'){ hCounter++; } }
return 100*(double)correctCounter/(double)hCounter; }
//calculate QCScore private static double QCScore(String predictedSequence, String observedSequence){ int correctCounter=0; int cCounter=0; for (int i=0;i<predictedSequence.length();i++){ if (predictedSequence.charAt(i)=='C'&&observedSequence.charAt(i)=='C'){ correctCounter++; } if(observedSequence.charAt(i)=='C'){ cCounter++; } }
return 100*(double)correctCounter/(double)cCounter; }
//calculate total SOVScore private static double SOVScore(String predictedSequence, String observedSequence){ double[] tempValuesH=SOVi(predictedSequence,observedSequence,'H'); double[] tempValuesE=SOVi(predictedSequence,observedSequence,'E'); double[] tempValuesC=SOVi(predictedSequence,observedSequence,'C'); double n= tempValuesC[1]+tempValuesE[1]+tempValuesH[1];
double sumE=tempValuesE[0]; double sumH=tempValuesH[0]; double sumC=tempValuesC[0]; return 100*(1/n)* (sumE+sumH+sumC);
}
// calculate SOV(i) values private static double[] SOVi(String predictedSequence, String observedSequence,char character){ int startpos1=-1; int endpos1=-1; int startpos2=-1; int endpos2=-1; double sovSummand=0; int totalLenS1=0; int totalLenS1notfound=0; double[] returnvalues=new double[2]; int lastUsedEndpos2=-1;
predictedSequence=predictedSequence+"I"; //I is appended to evaluate the last character if the original-sequence observedSequence=observedSequence+"I";
for (int i=0;i<predictedSequence.length();i++){
if (i>endpos1&&endpos1>=0){ endpos1=-1; startpos1=-1; } if (i>endpos2&&endpos2>=0&&lastUsedEndpos2<endpos2){ totalLenS1notfound+=(endpos2-startpos2+1); endpos2=-1; startpos2=-1; } else if(i>endpos2&&endpos2>=0){ endpos2=-1; startpos2=-1; }
//finding a sequence on predicted
if (startpos1<0 && predictedSequence.charAt(i)==character){
startpos1=i;
for (int j=i;j<predictedSequence.length();j++){
if (predictedSequence.charAt(j)!=character&&endpos1<0){
endpos1=j-1;
j=predictedSequence.length();
//if a sequence on the observed sequence exists, an overlap has been found-->calculate SOV-sum
if (startpos2>=0){
sovSummand+= minmaxov(startpos2,endpos2,startpos1,endpos1);
totalLenS1+=endpos2-startpos2+1;
if (endpos1==endpos2){
lastUsedEndpos2=endpos2;
i=endpos2+1;
endpos1=-1;
startpos1=-1;
endpos2=-1;
startpos2=-1;
} else if (endpos1<endpos2){
lastUsedEndpos2=endpos2;
i=endpos1+1;
endpos1=-1;
startpos1=-1;
}else {
lastUsedEndpos2=endpos2;
i=endpos2+1;
startpos2=-1;
endpos2=-1;
} } } } } // finding a sequence on observed if (i>endpos2&&endpos2>=0&&lastUsedEndpos2<endpos2){ totalLenS1notfound+=(endpos2-startpos2+1); endpos2=-1; startpos2=-1; }else if(i>endpos2&&endpos2>=0){ endpos2=-1; startpos2=-1; }
//if a sequence on the predicted sequence exists, an overlap has been found-->calculate SOV-sum if (startpos2<0 && observedSequence.charAt(i)==character){ startpos2=i; for (int j=i;j<observedSequence.length();j++){ if (observedSequence.charAt(j)!=character&&endpos2<0){ endpos2=j-1; j=observedSequence.length(); if (startpos1>=0){ totalLenS1+=endpos2-startpos2+1;
sovSummand+= minmaxov(startpos2,endpos2,startpos1,endpos1);
if (endpos1==endpos2){
i=endpos2+1;
lastUsedEndpos2=endpos2;
endpos1=-1;
startpos1=-1;
endpos2=-1;
startpos2=-1;
} else if (endpos1<endpos2){
i=endpos1+1;
endpos1=-1;
startpos1=-1;
lastUsedEndpos2=endpos2;
}else {
lastUsedEndpos2=endpos2;
i=endpos2+1;
endpos2=-1;
startpos2=-1;
}
}
}
} }
}
returnvalues[0]=sovSummand; returnvalues[1]=totalLenS1+totalLenS1notfound; //the N-value
return returnvalues;
}
// returns the value after the sum-symbol in the SOV-formula
private static double minmaxov(int start1, int end1, int start2,int end2){
int minov;
int maxov;
int delta;
int lenS1=end1-start1+1;
int lenS2=end2-start2+1;
minov= Math.min(end1, end2)-Math.max(start1, start2)+1;
maxov= Math.max(end1, end2)-Math.min(start1, start2)+1;
delta=Math.min((maxov-minov), Math.min(minov, Math.min(lenS1/2, lenS2/2)));
return ((minov+delta)/(double)maxov)*lenS1;
}
} </source>
Disorder
IUPred predictions: <source lang="bash">
- !/bin/bash
- Disorder
DIS="/mnt/home/student/bernhoferm/mstrprkt/task3/disorder" cd /opt/iupred/
iupred $DIS/fasta/Q30201.fasta long > $DIS/pred/Q30201.pred iupred $DIS/fasta/P10775.fasta long > $DIS/pred/P10775.pred iupred $DIS/fasta/Q9X0E6.fasta long > $DIS/pred/Q9X0E6.pred iupred $DIS/fasta/Q08209.fasta long > $DIS/pred/Q08209.pred </source>
Transmembrane Helices
PolyPhobius preprocessing: <source lang="bash">
- !/bin/bash
- Transmembrane Helices
TMH="/mnt/home/student/bernhoferm/mstrprkt/task3/tmh" BG="/mnt/project/pracstrucfunc12/polyphobius/blastget" DB="/mnt/project/pracstrucfunc12/data/swissprot/uniprot_sprot" DBI="/mnt/project/pracstrucfunc12/data/index_pp/uniprot_sprot.idx" KAL="/mnt/opt/T-Coffee/bin/kalign"
`perl $BG -db $DB -ix $DBI $TMH/fasta/Q30201.fasta > $TMH/tmp/Q30201.bgout`
$KAL -f fasta -input $TMH/tmp/Q30201.bgout -output $TMH/tmp/Q30201.kalout
`perl $BG -db $DB -ix $DBI $TMH/fasta/P35462.fasta > $TMH/tmp/P35462.bgout` $KAL -f fasta -input $TMH/tmp/P35462.bgout -output $TMH/tmp/P35462.kalout
`perl $BG -db $DB -ix $DBI $TMH/fasta/Q9YDF8.fasta > $TMH/tmp/Q9YDF8.bgout` $KAL -f fasta -input $TMH/tmp/Q9YDF8.bgout -output $TMH/tmp/Q9YDF8.kalout
`perl $BG -db $DB -ix $DBI $TMH/fasta/P47863.fasta > $TMH/tmp/P47863.bgout` $KAL -f fasta -input $TMH/tmp/P47863.bgout -output $TMH/tmp/P47863.kalout </source>
PolyPhobius predictions:
<source lang="bash">
- !/bin/bash
- Transmembrane Helices
TMH="/mnt/home/student/bernhoferm/mstrprkt/task3/tmh" POL="/mnt/project/pracstrucfunc12/polyphobius/jphobius"
PATH=$PATH:/mnt/project/pracstrucfunc12/polyphobius/ export PATH
$POL -poly $TMH/tmp/Q30201.kalout > $TMH/pred/Q30201.pol $POL -poly $TMH/tmp/P35462.kalout > $TMH/pred/P35462.pol $POL -poly $TMH/tmp/Q9YDF8.kalout > $TMH/pred/Q9YDF8.pol $POL -poly $TMH/tmp/P47863.kalout > $TMH/pred/P47863.pol </source>
Signal Peptides
We used the SignalP Webserver 4.0 for the predictions. Settings were not changed (i.e. we used the default settings).
GO Terms
For the GO term predictions the webservers were used:
Other Scripts