Difference between revisions of "Journal Task4 PKU"
(→Datasets) |
|||
Line 105: | Line 105: | ||
} |
} |
||
+ | close OUT80; |
||
+ | close OUT40; |
||
+ | close OUT20; |
||
+ | </source> |
||
+ | === PDBe === |
||
+ | <source lang="perl"> |
||
+ | my $id; |
||
+ | my $Evalue; |
||
+ | my $Identities; |
||
+ | my $bool = "F"; |
||
+ | |||
+ | open(IN,"<". $ARGV[0]); |
||
+ | open(OUT80, ">dataset_80.txt"); |
||
+ | open(OUT40, ">dataset_40.txt"); |
||
+ | open(OUT20, ">dataset_20.txt"); |
||
+ | |||
+ | mkdir("output"); |
||
+ | |||
+ | while(<IN>) { |
||
+ | chomp; |
||
+ | |||
+ | if( ($_ =~ /^\d+/ )) { |
||
+ | @linesplit = split( /\t/,$_); |
||
+ | |||
+ | $id = $linesplit[0]; |
||
+ | $Evalue = $linesplit[8]; |
||
+ | $Identities = $linesplit[7]; |
||
+ | if($Evalue <= $ARGV[1] && $Identities > 80){ |
||
+ | print OUT80 "$id\t$Evalue\t$Identities\n"; |
||
+ | @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); |
||
+ | system(@args) == 0 or die "system @args failed: $?" |
||
+ | } |
||
+ | elsif($Evalue <= $ARGV[1] && $Identities < 30){ |
||
+ | print OUT20 "$id\t$Evalue\t$Identities\n"; |
||
+ | @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); |
||
+ | system(@args) == 0 or die "system @args failed: $?" |
||
+ | } |
||
+ | elsif($Evalue <= $ARGV[1] && $Identities > 40) |
||
+ | { |
||
+ | print OUT40 "$id\t$Evalue\t$Identities\n"; |
||
+ | @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); |
||
+ | system(@args) == 0 or die "system @args failed: $?" |
||
+ | } |
||
+ | } |
||
+ | |||
+ | } |
||
close OUT80; |
close OUT80; |
||
close OUT40; |
close OUT40; |
Revision as of 15:14, 30 May 2012
Phenylketonuria » Homology based structure predictions » Journal
Contents
Datasets
To get homologous structures we performed three different searches with three different web-services and compared them to our findings from Task2. We got three outputfiles which were parsed and the related files got downloaded from the PDB by using these scripts
HHPred
<source lang="perl"> my $id; my $Evalue; my $Identities; my $bool = "F";
open(IN,"<". $ARGV[0]); open(OUT80, ">dataset_80.txt"); open(OUT40, ">dataset_40.txt"); open(OUT20, ">dataset_20.txt");
mkdir("output");
while(<IN>) { chomp; if( ($_ =~ /^>\d+/ )) { $_ =~ />(.{4}).+/; $id = $1; $bool ="T"; } if($bool eq "T" && $_ =~ /^.*Probab/) { $_ =~ /^.*Probab=.+E-value=(.+)\s+Score=.+Aligned_cols=.+Identities=(.+)%\s+Similarity=.+/; $bool ="F"; $Evalue = $1; $Identities = $2; if($Evalue <= $ARGV[1] && $Identities > 80){ print OUT80 "$id\t$Evalue\t$Identities\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } elsif($Evalue <= $ARGV[1] && $Identities < 30){ print OUT20 "$id\t$Evalue\t$Identities\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } elsif($Evalue <= $ARGV[1] && $Identities > 40) { print OUT40 "$id\t$Evalue\t$Identities\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } } } close OUT80; close OUT40; close OUT20; </source>
Coma
<source lang="perl"> my $id; my $Evalue; my $Identities; my $Positives; my $bool = "F";
open(IN,"<". $ARGV[0]); open(OUT80, ">dataset_coma_80.txt"); open(OUT40, ">dataset_coma_40.txt"); open(OUT20, ">dataset_coma_20.txt");
mkdir("outputcoma");
while(<IN>) { chomp; if( ($_ =~ /^>\d+/ )) { $_ =~ />(.{4}).+/; $id = $1; $bool ="T"; } elsif($bool eq "T" && $_ =~ /^Score/){ $_ =~ /.+Expect\s+=\s+(.+)\s+P-value.+/; $Evalue = $1; } elsif($bool eq "T" && $_ =~ /^Identities/) { $_ =~ /Identities.+\((\d+)%\)\s+Positives.+\((\d+)%\)\s+Gaps.+/; $bool ="F"; $Identities = $1; $Positives = $2; if($Evalue <= $ARGV[1] && $Identities > 80){ print OUT80 "$id\t$Evalue\t$Identities\t$Positives\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=outputcoma/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } elsif($Evalue <= $ARGV[1] && $Identities < 30){ print OUT20 "$id\t$Evalue\t$Identities\t$Positives\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=outputcoma/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } elsif($Evalue <= $ARGV[1] && $Identities > 40) { print OUT40 "$id\t$Evalue\t$Identities\t$Positives\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=outputcoma/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } } }
close OUT80; close OUT40; close OUT20; </source>
PDBe
<source lang="perl"> my $id; my $Evalue; my $Identities; my $bool = "F";
open(IN,"<". $ARGV[0]); open(OUT80, ">dataset_80.txt"); open(OUT40, ">dataset_40.txt"); open(OUT20, ">dataset_20.txt");
mkdir("output");
while(<IN>) { chomp;
if( ($_ =~ /^\d+/ )) { @linesplit = split( /\t/,$_);
$id = $linesplit[0]; $Evalue = $linesplit[8]; $Identities = $linesplit[7]; if($Evalue <= $ARGV[1] && $Identities > 80){ print OUT80 "$id\t$Evalue\t$Identities\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } elsif($Evalue <= $ARGV[1] && $Identities < 30){ print OUT20 "$id\t$Evalue\t$Identities\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } elsif($Evalue <= $ARGV[1] && $Identities > 40) { print OUT40 "$id\t$Evalue\t$Identities\n"; @args = ("wget", "http://pdb.rcsb.org/pdb/files/". $id . ".pdb", "--output-document=output/$id.pdb"); system(@args) == 0 or die "system @args failed: $?" } }
} close OUT80; close OUT40; close OUT20; </source>