Difference between revisions of "Script distance check.py"
(Created page with "<source lang="python"> #!/usr/bin/python import sys import os import re import numpy import math input=sys.argv[1]#contact.out in_pdb=sys.argv[2]#pdb file with actual coordina…") |
|||
(2 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
+ | This program <code> check_distance> uses the pdb coordinates to calculate the distance between predicted contacts with an CN>1. Contacts with a distance >5 Angström are classified as FP, otherwise the are TP. The distances are calculated using the euklidean distance formula. |
||
<source lang="python"> |
<source lang="python"> |
||
#!/usr/bin/python |
#!/usr/bin/python |
||
Line 10: | Line 11: | ||
input=sys.argv[1]#contact.out |
input=sys.argv[1]#contact.out |
||
− | in_pdb=sys.argv[2]#pdb file |
+ | in_pdb=sys.argv[2]#pdb file of reference structure |
residues=[] |
residues=[] |
||
CN_dist=[]#[pos1, pos2, cn, state] state~TP or FP |
CN_dist=[]#[pos1, pos2, cn, state] state~TP or FP |
||
+ | coordinates={}# contains x,y,z coordinates in a list of all atoms for every residue. {residue: [[x,y,z],[x,y,z],......]} |
||
− | coordinates={} |
||
#calculates euklidean distance between two atoms |
#calculates euklidean distance between two atoms |
||
Line 34: | Line 35: | ||
return "FP" |
return "FP" |
||
− | #checks TP |
+ | #checks TP or FP for all residue pairs |
def all_dist(res): |
def all_dist(res): |
||
for i in res:#for every residue pair |
for i in res:#for every residue pair |
||
− | if(i[5]>1 ): |
+ | if(i[5]>1 ): |
− | #print(i) |
||
− | #print(coordinates.has_key(i[0])) |
||
atoms1=coordinates[i[0]]#of all atoms of residue 1 (list of dictionarys) |
atoms1=coordinates[i[0]]#of all atoms of residue 1 (list of dictionarys) |
||
atoms2=coordinates[i[2]]#of all atoms of residue 2 (list of dictionarys) |
atoms2=coordinates[i[2]]#of all atoms of residue 2 (list of dictionarys) |
||
state=any_atom(atoms1, atoms2) |
state=any_atom(atoms1, atoms2) |
||
− | |||
CN_dist.append([i[0],i[1],i[2],i[3],i[5],state]) |
CN_dist.append([i[0],i[1],i[2],i[3],i[5],state]) |
||
+ | |||
+ | # stores all coordinates of all atoms of the structure |
||
def read_pdb(): |
def read_pdb(): |
||
file=open(in_pdb, 'r') |
file=open(in_pdb, 'r') |
||
Line 59: | Line 59: | ||
else: |
else: |
||
res_co.append({"x":float(info[6]),"y":float(info[7]),"z":float(info[8])}) |
res_co.append({"x":float(info[6]),"y":float(info[7]),"z":float(info[8])}) |
||
+ | |||
− | |||
− | #print(info) |
||
coordinates[int(info[5])]=res_co #key:aa position |
coordinates[int(info[5])]=res_co #key:aa position |
||
− | |||
file.close() |
file.close() |
||
+ | |||
#info[position1, residue1, position2, residue2, MI, CN] |
#info[position1, residue1, position2, residue2, MI, CN] |
Latest revision as of 21:54, 3 September 2013
This program check_distance> uses the pdb coordinates to calculate the distance between predicted contacts with an CN>1. Contacts with a distance >5 Angström are classified as FP, otherwise the are TP. The distances are calculated using the euklidean distance formula.
<source lang="python">
- !/usr/bin/python
import sys
import os
import re
import numpy
import math
input=sys.argv[1]#contact.out
in_pdb=sys.argv[2]#pdb file of reference structure
residues=[]
CN_dist=[]#[pos1, pos2, cn, state] state~TP or FP
coordinates={}# contains x,y,z coordinates in a list of all atoms for every residue. {residue: [[x,y,z],[x,y,z],......]}
- calculates euklidean distance between two atoms
def calc_dist(co1, co2):#dictionary with xyz
p1=numpy.array((co1["x"],co1["y"],co1["z"]))
p2=numpy.array((co2["x"],co2["y"],co2["z"]))
dist=numpy.linalg.norm(p1-p2)
return dist
- checks distance between any atom of two residues
def any_atom(atom1_list, atom2_list):
for a1 in atom1_list:
for a2 in atom2_list:
dist=calc_dist(a1,a2)
if(dist<5.0):
return "TP"
return "FP"
- checks TP or FP for all residue pairs
def all_dist(res):
for i in res:#for every residue pair
if(i[5]>1 ):
atoms1=coordinates[i[0]]#of all atoms of residue 1 (list of dictionarys)
atoms2=coordinates[i[2]]#of all atoms of residue 2 (list of dictionarys)
state=any_atom(atoms1, atoms2)
CN_dist.append([i[0],i[1],i[2],i[3],i[5],state])
- stores all coordinates of all atoms of the structure
def read_pdb():
file=open(in_pdb, 'r')
lines=file.readlines()
for line in lines:
if(re.match("^ATOM",line)):
info=line.split()
if(info[2]=="N"):#every new residue starts with N
res_co=[{"x":float(info[6]),"y":float(info[7]),"z":float(info[8])}] #list of coordinates of all atoms
else:
res_co.append({"x":float(info[6]),"y":float(info[7]),"z":float(info[8])})
coordinates[int(info[5])]=res_co #key:aa position
file.close()
- info[position1, residue1, position2, residue2, MI, CN]
def read_input():
file=open(input, 'r')
lines=file.readlines()
for line in lines:
text=line.lstrip().rstrip('\n')
info=text.split()
info[0]=int(info[0])
info[2]=int(info[2])
info[4]=float(info[4])
info[5]=float(info[5])
residues.append(info)
file.close()
- writes CN_dist into file
def make_output(a):
file_name="corelations_%s"%(input)
file=open(file_name, 'w')
for i in a:
file.write("%i %s %i %s %f %s\n"%(i[0],i[1],i[2],i[3],i[4],i[5]))
file.close()
if __name__ == '__main__':
read_input()
read_pdb()
all_dist(residues)
make_output(CN_dist)
</source>