Difference between revisions of "Script distance check.py"

From Bioinformatikpedia
(Created page with "<source lang="python"> #!/usr/bin/python import sys import os import re import numpy import math input=sys.argv[1]#contact.out in_pdb=sys.argv[2]#pdb file with actual coordina…")
 
 
(2 intermediate revisions by the same user not shown)
Line 1: Line 1:
  +
This program <code> check_distance> uses the pdb coordinates to calculate the distance between predicted contacts with an CN>1. Contacts with a distance >5 Angström are classified as FP, otherwise the are TP. The distances are calculated using the euklidean distance formula.
 
<source lang="python">
 
<source lang="python">
 
#!/usr/bin/python
 
#!/usr/bin/python
Line 10: Line 11:
   
 
input=sys.argv[1]#contact.out
 
input=sys.argv[1]#contact.out
in_pdb=sys.argv[2]#pdb file with actual coordinates
+
in_pdb=sys.argv[2]#pdb file of reference structure
   
   
 
residues=[]
 
residues=[]
 
CN_dist=[]#[pos1, pos2, cn, state] state~TP or FP
 
CN_dist=[]#[pos1, pos2, cn, state] state~TP or FP
  +
coordinates={}# contains x,y,z coordinates in a list of all atoms for every residue. {residue: [[x,y,z],[x,y,z],......]}
coordinates={}
 
   
 
#calculates euklidean distance between two atoms
 
#calculates euklidean distance between two atoms
Line 34: Line 35:
 
return "FP"
 
return "FP"
   
#checks TP oor FP for all residue pairs
+
#checks TP or FP for all residue pairs
 
def all_dist(res):
 
def all_dist(res):
 
for i in res:#for every residue pair
 
for i in res:#for every residue pair
if(i[5]>1 ):
+
if(i[5]>1 ):
#print(i)
 
#print(coordinates.has_key(i[0]))
 
 
atoms1=coordinates[i[0]]#of all atoms of residue 1 (list of dictionarys)
 
atoms1=coordinates[i[0]]#of all atoms of residue 1 (list of dictionarys)
 
atoms2=coordinates[i[2]]#of all atoms of residue 2 (list of dictionarys)
 
atoms2=coordinates[i[2]]#of all atoms of residue 2 (list of dictionarys)
   
 
state=any_atom(atoms1, atoms2)
 
state=any_atom(atoms1, atoms2)
 
 
CN_dist.append([i[0],i[1],i[2],i[3],i[5],state])
 
CN_dist.append([i[0],i[1],i[2],i[3],i[5],state])
   
  +
  +
# stores all coordinates of all atoms of the structure
 
def read_pdb():
 
def read_pdb():
 
file=open(in_pdb, 'r')
 
file=open(in_pdb, 'r')
Line 59: Line 59:
 
else:
 
else:
 
res_co.append({"x":float(info[6]),"y":float(info[7]),"z":float(info[8])})
 
res_co.append({"x":float(info[6]),"y":float(info[7]),"z":float(info[8])})
  +
 
#print(info)
 
 
coordinates[int(info[5])]=res_co #key:aa position
 
coordinates[int(info[5])]=res_co #key:aa position
 
 
file.close()
 
file.close()
  +
   
 
#info[position1, residue1, position2, residue2, MI, CN]
 
#info[position1, residue1, position2, residue2, MI, CN]

Latest revision as of 20:54, 3 September 2013

This program check_distance> uses the pdb coordinates to calculate the distance between predicted contacts with an CN>1. Contacts with a distance >5 Angström are classified as FP, otherwise the are TP. The distances are calculated using the euklidean distance formula. <source lang="python">

  1. !/usr/bin/python

import sys import os import re import numpy import math


input=sys.argv[1]#contact.out in_pdb=sys.argv[2]#pdb file of reference structure


residues=[] CN_dist=[]#[pos1, pos2, cn, state] state~TP or FP coordinates={}# contains x,y,z coordinates in a list of all atoms for every residue. {residue: [[x,y,z],[x,y,z],......]}

  1. calculates euklidean distance between two atoms

def calc_dist(co1, co2):#dictionary with xyz

       p1=numpy.array((co1["x"],co1["y"],co1["z"]))
       p2=numpy.array((co2["x"],co2["y"],co2["z"]))
       dist=numpy.linalg.norm(p1-p2)
       return dist
  1. checks distance between any atom of two residues

def any_atom(atom1_list, atom2_list):

               for a1 in atom1_list:
                       for a2 in atom2_list:
                               dist=calc_dist(a1,a2)
                               if(dist<5.0):
                                       return "TP"
               return "FP"
  1. checks TP or FP for all residue pairs

def all_dist(res):

       for i in res:#for every residue pair
               if(i[5]>1 ):                                    
                       atoms1=coordinates[i[0]]#of all atoms of residue 1 (list of dictionarys)
                       atoms2=coordinates[i[2]]#of all atoms of residue 2 (list of dictionarys)
                       state=any_atom(atoms1, atoms2)
                       CN_dist.append([i[0],i[1],i[2],i[3],i[5],state])


  1. stores all coordinates of all atoms of the structure

def read_pdb():

       file=open(in_pdb, 'r')
       lines=file.readlines()
       for line in lines:
               if(re.match("^ATOM",line)):
                       info=line.split()
                       if(info[2]=="N"):#every new residue starts with N
                               res_co=[{"x":float(info[6]),"y":float(info[7]),"z":float(info[8])}] #list of coordinates of all atoms
                       else:
                               res_co.append({"x":float(info[6]),"y":float(info[7]),"z":float(info[8])})
                      
                       coordinates[int(info[5])]=res_co #key:aa position
       file.close()


  1. info[position1, residue1, position2, residue2, MI, CN]

def read_input():

       file=open(input, 'r')
       lines=file.readlines()
       for line in lines:
               text=line.lstrip().rstrip('\n')
               info=text.split()
               info[0]=int(info[0])
               info[2]=int(info[2])
               info[4]=float(info[4])
               info[5]=float(info[5])
               residues.append(info)
       file.close()
  1. writes CN_dist into file

def make_output(a):

       file_name="corelations_%s"%(input)
       file=open(file_name, 'w')
       for i in a:
               file.write("%i %s %i %s %f %s\n"%(i[0],i[1],i[2],i[3],i[4],i[5]))
       file.close()


if __name__ == '__main__':

       read_input()
       read_pdb()
       all_dist(residues)
       make_output(CN_dist)


</source>