Difference between revisions of "Script calc hotspot.py"

From Bioinformatikpedia
(Created page with "<source lang="python"> #!/usr/bin/python import sys import os import re import numpy import math import operator input=sys.argv[1]#top_score_couplings.out file only contains th…")
 
 
Line 9: Line 9:
 
import operator
 
import operator
   
input=sys.argv[1]#top_score_couplings.out file only contains the sorted best L hits
+
input=sys.argv[1]#input file formatted like freecontact output , but file must only contain the sorted best L hits!!!!!!!!!!!
 
number=0
 
number=0
   

Latest revision as of 21:09, 3 September 2013

<source lang="python">

  1. !/usr/bin/python

import sys import os import re import numpy import math import operator

input=sys.argv[1]#input file formatted like freecontact output , but file must only contain the sorted best L hits!!!!!!!!!!! number=0

def calc_scores(pairs):

       res_scores={}
       for p in pairs:
               r1="%i:%s"%(p[0],p[1])
               r2="%i:%s"%(p[2],p[3])
               res_scores[r1]=add_score(r1,p[5],res_scores)
               res_scores[r2]=add_score(r2,p[5],res_scores)
       return res_scores

def add_score(r, score, dict):

       if(dict.has_key(r)):
               return dict[r]+score
       else:
               return score


  1. info[position1, residue1, position2, residue2, MI,CN]

def read_input():

       file=open(input, 'r')
       lines=file.readlines()
       residues=[]
       number=len(lines)
       for line in lines:
               text=line.lstrip().rstrip('\n')
               info=text.split()
               info[0]=int(info[0])
               info[2]=int(info[2])
               info[4]=float(info[4])
               info[5]=float(info[5])
               residues.append(info)
       return residues


def make_output(top):

       top_sorted=sorted(top.iteritems(), key=operator.itemgetter(1))
       for i in top_sorted:
               print("%s %f"%(i[0],i[1]))


  1. calculates the normalisation parameter of L scores

def norm_para(l):

       score_list=[]
       for n in l:
               score_list.append(n[5])
       parameter_nm=numpy.mean(score_list)
       return parameter_nm
  1. normalizes a value using the average (para)

def normalize_values(para, scores):

       for r in scores.keys():
               scores[r]=scores[r]/para
       return scores


if __name__ == '__main__':

       res_pairs=read_input()
       scores=calc_scores(res_pairs)
       normalizer=norm_para(res_pairs)
       top_hits=normalize_values(normalizer, scores)
       make_output(top_hits)

</source>