Script calc hotspot.py
From Bioinformatikpedia
<source lang="python">
- !/usr/bin/python
import sys import os import re import numpy import math import operator
input=sys.argv[1]#top_score_couplings.out file only contains the sorted best L hits number=0
def calc_scores(pairs):
res_scores={} for p in pairs: r1="%i:%s"%(p[0],p[1]) r2="%i:%s"%(p[2],p[3]) res_scores[r1]=add_score(r1,p[5],res_scores) res_scores[r2]=add_score(r2,p[5],res_scores) return res_scores
def add_score(r, score, dict):
if(dict.has_key(r)): return dict[r]+score else: return score
- info[position1, residue1, position2, residue2, MI,CN]
def read_input():
file=open(input, 'r') lines=file.readlines() residues=[] number=len(lines) for line in lines: text=line.lstrip().rstrip('\n') info=text.split()
info[0]=int(info[0]) info[2]=int(info[2]) info[4]=float(info[4]) info[5]=float(info[5])
residues.append(info)
return residues
def make_output(top):
top_sorted=sorted(top.iteritems(), key=operator.itemgetter(1)) for i in top_sorted: print("%s %f"%(i[0],i[1]))
- calculates the normalisation parameter of L scores
def norm_para(l):
score_list=[] for n in l: score_list.append(n[5])
parameter_nm=numpy.mean(score_list) return parameter_nm
- normalizes a value using the average (para)
def normalize_values(para, scores):
for r in scores.keys(): scores[r]=scores[r]/para return scores
if __name__ == '__main__':
res_pairs=read_input() scores=calc_scores(res_pairs) normalizer=norm_para(res_pairs) top_hits=normalize_values(normalizer, scores) make_output(top_hits)
</source>