Script rank contacts.py

From Bioinformatikpedia
Revision as of 21:33, 3 September 2013 by Gerkej (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

This program rank_contacts.py removes all contacts between direct neigbours (sequence distance <5 residues) and sorts te remaining contacts according to their CN score.

<source lang="python">

  1. !/usr/bin/python

import sys import os import re import numpy import math


input=sys.argv[1]#input file: contact.out (generated by freecontact)

residues=[]#stores residue information [position1, position2, MI, CN]

  1. info[position1, residue1, position2, residue2, MI, CN]

def read_input():

       file=open(input, 'r')
       lines=file.readlines()
       for line in lines:
               text=line.lstrip().rstrip('\n')
               info=text.split()
               info[0]=int(info[0])
               info[2]=int(info[2])
               info[4]=float(info[4])
               info[5]=float(info[5])
               residues.append(info)
       file.close()
  1. writes sorted residues into file

def make_output(a):

       file_name="filtered_%s"%(input)
       file=open(file_name, 'w')
       for i in a:
               file.write("%i %s %i %s %f %f\n"%(i[0],i[1],i[2],i[3],i[4],i[5]))
               
       file.close()
  1. removes all contacts with a position differenc <=5

def remove_distances(a):

       res=[]
       for i in a:
               if(math.fabs(i[0]-i[2])>5):
                       res.append(i)
       return res


if __name__ == '__main__':

       read_input()
       filtered_residues=remove_distances(residues)
       score_sorted=sorted(filtered_residues, key=lambda resid: resid[5],reverse=True)# sorts descending of CN
       make_output(score_sorted)
      

</source>