Modeller.py

From Bioinformatikpedia

This program can run modeller automatically. Both single template modelling and multiple template modelling are supported.

Usage

Usage: Modeller.py \
	--template template.pdb[,template2.pdb[,template3.pdb]] \
	--chain chain[,chain2,[chain3]]	--target target.pir \
	--align alignment-file.ali [--has-align]

Parameters

  • --template template protein structures (multiple structures are separated by commas)
  • --chain selected chains from template protein structures (multiple templates are separated by commas)
  • --target target sequence in PIR
  • --align file to store sequence alignments
  • --has-align OPTIONAL: whether we have already have alignment file

Program code

<source lang='python'>

  1. !/usr/bin/python

from modeller import * import sys import os.path

def getPIRSeqName(path): f = file(path) header = f.readline().replace('\n', ) f.close() name = header.split(';')[1]; return name

def getTemplateName(template): filename = os.path.basename(template) name = os.path.splitext(filename)[0] return name

def getPDBId(path): f = file(path) header = f.readline().rstrip()[-4:].lower() f.close() return header

(template, target, alnfile, chain, hasAlign) = (None, None, None, None, False)

  1. parse command line

try: for i in range(0, len(sys.argv)): if sys.argv[i] == '--template': template = sys.argv[i+1] elif sys.argv[i] == '--chain': chain = sys.argv[i+1] elif sys.argv[i] == '--target': target = sys.argv[i+1] elif sys.argv[i] == '--align': alnfile = sys.argv[i+1] elif sys.argv[i] == '--has-align': hasAlign = True; if template == None or \ target == None or \ alnfile == None or \ chain == None: raise "Error!" except: print "Homology modelling with Modeller" print Usage: Modeller.py \\ --template template.pdb[,template2.pdb[,template3.pdb]] \\ --chain chain[,chain2,[chain3]] --target target.pir \\ --align alignment-file.ali [--has-align] exit(-1)

targetName = getPIRSeqName(target) templates = template.split(',')

if len(templates) == 1: pdbId = getPDBId(template) templateAlignCodes = pdbId + chain

  1. initialize and perform alignment

env = environ() if not hasAlign: aln = alignment(env) mdl = model(env, \ file=template, \ model_format='PDB', \ model_segment=('FIRST:'+chain,'LAST:'+chain)) aln.append_model(mdl, align_codes=templateAlignCodes, atom_files=template) aln.append(file=target, align_codes=targetName) aln.align2d() aln.check() aln.write(file=alnfile, alignment_format='PIR')

  1. modelling

from modeller.automodel import * a = automodel(env, \ alnfile = alnfile, \ knowns = templateAlignCodes, \ sequence = targetName, \ assess_methods=(assess.DOPE, assess.GA341)) a.starting_model = 1 a.ending_model = 1 a.make() else:

  1. multi-template modelling
  2. prepare MSA

def combineStructuresAndChains(pdbId, chain, templateFile): if templateFile != None: return (pdbId, chain, templateFile) else: return (pdbId, chain) chains = chain.split(',') pdbIDs = [] for template in templates: pdbid = getPDBId(template) pdbIDs.append(pdbid) alignCodes = map(combineStructuresAndChains, pdbIDs, chains, templates)

  1. align template structures

env=environ() aln = alignment(env) for (code, chain, templateFile) in alignCodes: mdl = model(env, file=templateFile, model_segment=\ ('FIRST:'+chain,'LAST:'+chain)) aln.append_model(mdl, \ align_codes=code+chain, \ atom_files=templateFile) aln.salign() aln.write(file=alnfile+'.pap', alignment_format='PAP') aln.write(file=alnfile, alignment_format='PIR')

  1. align target sequence

aln = alignment(env) aln.append(file=alnfile, align_codes='all') aln_block = len(aln) aln.append(file=target, align_codes=targetName) aln.salign() aln.write(file=alnfile, alignment_format='PIR') aln.write(file=alnfile+'.pap', alignment_format='PAP')

  1. build model

from modeller.automodel import * def myJoin(a, b): return (a+b) knowns = map(myJoin, pdbIDs, chains) a = automodel(env, alnfile=alnfile, knowns=knowns, sequence=targetName) a.starting_model = 1 a.ending_model = 1 a.make() </source>