Task 10 - Journal (PKU)

From Bioinformatikpedia
Revision as of 16:21, 29 July 2012 by Boidolj (talk | contribs) (Ramachandran Plot)

Initial checks

Verify completion, frame number and frame rate: <source lang="bash"> gmxcheck -f _md.xtc </source>

performance statistics: <source lang="bash"> tail _md.log </source>

Create Trajectories

<source lang="bash">

  1. fix jumps over the periodic boundary

trjconv -f _md.xtc -s _solv.pdb -pbc nojump -o _traj_nojump.xtc<<EOF0\nEOF

  1. center protein in the box

trjconv -f _traj_nojump.xtc -s _solv.pdb -center -o _traj_center.xtc<<EOF1\n0\nEOF

  1. eliminate rotations

trjconv -f _traj_center.xtc -s _solv.pdb -fit rot+trans -o _traj_norot.xtc<<EOF1\n0\nEOF

  1. create pdb file

trjconv -f _traj_norot.xtc -s _solv.pdb -o _traj.pdb<<EOF1\nEOF

  1. reduce fluctuations and write every fifth frame

g_filter -f _traj.pdb -s _solv.pdb -ol _traj_filtered.pdb -fit -nf 5<<EOF1\nEOF


Plot Energies

<source lang="bash"> echo 12 0 | g_energy -f _md.edr -o _temperature.xvg echo 13 0 | g_energy -f _md.edr -o _pressure.xvg echo -e "9\n10\n11 0" | g_energy -f _md.edr -o _energy.xvg echo 18 0 | g_energy -f _md.edr -o _volume.xvg echo 19 0 | g_energy -f _md.edr -o _density.xvg echo -e "15\n16\n17 0" | g_energy -f _md.edr -o _box.xvg echo -e "48\n50 0" | g_energy -f _md.edr -o _coulomb-inter.xvg echo -e "49\n51 0" | g_energy -f _md.edr -o _vanderwaals-inter.xvg </source>

Minimal Distance of Interactions

<source lang="bash"> echo 1 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_distance.xvg -pi echo 3 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_c_alpha_distance.xvg -pi </source>

RMSD Calculations

<source lang="bash"> echo 1 1|g_rmsf -f _md.xtc -s _md.tpr -o _rmsf-per-residue.xvg -ox _average.pdb -oq _bfactors.pdb -res echo 1 1 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-all-atom-vs-start.xvg echo 4 4 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-backbone-vs-start.xvg echo 1 |trjconv -f _traj_norot.xtc -s _md.tpr -o _protein.xtc echo 1 1 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-all-atom-vs-average.xvg echo 4 4 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-backbone-vs-average.xvg </source>

Radius of Gyration

<source lang="bash"> echo 1 0 |g_gyrate -f _traj_norot.xtc -s _md.tpr -o _radius-of-gyration.xvg echo 1 0 |g_gyrate -f _traj_norot.xtc -s _md.tpr -o _moments_of_inertia.xvg -moi </source>

Accessible Surface

<source lang="bash"> echo 1 1 |g_sas -f _traj_norot.xtc -s _md.tpr -o _solvent-accessible-surface.xvg -oa _atomic-sas.xvg -or _residue-sas.xvg </source>

Hydrogen Bonds

<source lang="bash"> echo 1 1 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-intra-protein.xvg echo 1 12 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-protein-water.xvg </source>

Secondary Structure

<source lang="bash"> wget http://swift.cmbi.ru.nl/gv/dssp/HTML/dsspcmbi chmod +x dsspcmbi export DSSP=$HOME/dsspcmbi echo 1|do_dssp -f _traj_norot.xtc -s _md.tpr -o _secondary-structure.xpm -sc _secondary-structure.xvg -dt 10 xpm2ps -f _secondary-structure.xpm -o _secondary-structure.eps </source>

Ramachandran Plot

<source lang="bash"> g_rama -f _traj_norot.xtc -s _md.tpr -o _ramachandran.xvg </source>

Somewhat nicer and modified from here:

<source lang="python"> rama_GENERAL = "General" rama_GLYCINE = "Glycine" rama_PROLINE = "Proline" rama_PRE_PRO = "Pre-Pro" ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO]

  1. I have used the same colours as RAMPAGE
  2. http://raven.bioc.cam.ac.uk/rampage.php

rama_settings = {"General" : ([0, 0.0005, 0.02, 1],

                             # or rama500-general-nosec.data
                "Glycine" : ([0, 0.002,  0.02, 1],
                             # or rama500-gly-sym-nosec.data
                "Proline" : ([0, 0.002,  0.02, 1],
                "Pre-Pro" : ([0, 0.002,  0.02, 1],
                #P.S. Also rama500-ala-nosec.data

import numpy def load_data_file(filename) :

   lower_bounds = range(-180, 180, STEP)
   mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP)
   upper_bounds = range(-180+STEP, 180+STEP, STEP)
   data = numpy.array([[0.0 for x in mid_points] for y in mid_points],
   # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30"
   # Number of dimensions: 2
   # For each dimension, 1 to 2: lower_bound  upper_bound  number_of_bins  wrapping
   #   x1: -180.0 180.0 180 true
   #   x2: -180.0 180.0 180 true
   # List of table coordinates and values. (Value is last number on each line.)
   -179.0 -179.0 0.0918642445114388
   -179.0 -177.0 0.07105717866463215
   input_file = open(filename,"r")
   for line in input_file :
       #Strip the newline character(s) from the end of the line
       if line[-1]=="\n" : line = line[:-1]
       if line[-1]=="\r" : line = line[:-1]
       if line[0]=="#" :
       else :
           parts = line.split()
           assert len(parts)==3
           x1 = float(parts[0]) #phi
           x2 = float(parts[1]) #psi
           value = float(parts[2])
           assert x1 == float(int(x1))
           assert x2 == float(int(x2))
           i1 = mid_points.index(int(x1))
           i2 = mid_points.index(int(x2))
   return (data, lower_bounds, mid_points, upper_bounds)
  1. filename = "stat/rama/rama500-general.data"
  2. data, lower_bounds, mid_points, upper_bounds = load_data_file(filename)
  3. print sum(sum(data))

from rpy import * r.library("MASS")

print "Creating R function", r(""" ramachandran.plot <- function(x.scatter, y.scatter,

   x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid,
   xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE),
   zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels),
   nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) -
       1), plot.title="", plot.axes, key.title, key.axes, asp = NA,
   xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes,


   if (missing(z.grid)) {
       stop("no 'z.grid' matrix specified")
   else if (is.list(x.grid)) {
       y.grid <- x.grid$y
       x.grid <- x.grid$x
   if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0))
       stop("increasing 'x.grid' and 'y.grid' values expected")
   plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)
   if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1)
       stop("no proper 'z.grid' matrix specified")
   if (!is.double(z.grid))
       storage.mode(z.grid) <- "double"
   .Internal(filledcontour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels),
       col = col))
   if (!(missing(x.scatter)) && !(missing(y.scatter))) {
               xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp,
               type="p", pch=20, cex=0.1)
   if (missing(plot.axes)) {
       if (axes) {
           title(main=plot.title, xlab=expression(phi), ylab=expression(psi))
           axis(1, at=c(-180,-90,0,90,180))
           axis(2, at=c(-180,-90,0,90,180))
   else plot.axes
   if (frame.plot)
   if (missing(plot.title))
   else plot.title

} """) print "Done"

import math def degrees(rad_angle) :

   """Converts and angle in radians to degrees, mapped to the range [-180,180]"""
   angle = rad_angle * 180 / math.pi
   #Note this assume the radians angle is positive as that's what MMTK does
   while angle > 180 :
       angle = angle - 360
   return angle

def next_residue(residue) :

   """Expects an MMTK residue, returns the next residue in the chain, or None"""
   #Proteins go N terminal --> C terminal
   #The next reside is bonded to the C of this atom...
   for a in residue.peptide.C.bondedTo():
       if a.parent.parent != residue:
           return a.parent.parent
   return None

def residue_amino(residue) :

   """Expects an MMTK residue, returns the three letter amino acid code in upper case"""
   if residue :
       return residue.name[0:3].upper()
   else :
       return None

def residue_ramachandran_type(residue) :

   """Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)"""
   if residue_amino(residue)=="GLY" :
       return rama_GLYCINE
   elif residue_amino(residue)=="PRO" :
       return rama_PROLINE
   elif residue_amino(next_residue(residue))=="PRO" :
       #exlcudes those that are Pro or Gly
       return rama_PRE_PRO
   else :
       return rama_GENERAL

scatter_phi = dict() scatter_psi = dict() for ramachandran_type in ramachandran_types :


pdb=sys.argv[1] pdb_filename = "%s.pdb" % pdb

print "Loading PDB file: " + pdb_filename import MMTK.PDB import MMTK.Proteins

  1. protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens")
  2. Load the PDB file, ignore the hydrogrens, and then build a model of the peptides:

configuration = MMTK.PDB.PDBConfiguration(pdb_filename) configuration.deleteHydrogens() protein = MMTK.Proteins.Protein(configuration.createPeptideChains(model = "no_hydrogens")) for chain in protein :

   print chain.name
   for residue in chain :
       phi, psi = residue.phiPsi()
       #print residue.name, phi, psi
       if phi and psi :
           ramachandran_type = residue_ramachandran_type(residue)
           assert ramachandran_type in ramachandran_types
       assert len(scatter_phi) == len(scatter_psi)

print "Done"

  1. pdf_filename = "../%s.pdf" % pdb
  2. r.pdf(pdf_filename)

png_filename = "%s.png" % pdb r.png(png_filename)

  1. To get four plots on one page, you could use :
  2. r.split_screen([2,2]) #split into two by two screen
  3. Or:
  4. r.layout(Numeric.array([[1,2],[3,4]]), respect=True)
  5. But I went for simply:


for (i,ramachandran_type) in enumerate(ramachandran_types) :

   #pdf_filename = "../%s_%s.pdf" % (pdb, ramachandran_type)
   (rama_levels, rama_colors, rama_filename) = rama_settings[ramachandran_type]
   print "Loading data file: " + rama_filename,
   data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename)
   print "Done"
   #print "Creating PDF output file: " + pdf_filename,
   #r.plot(scatter_phi, scatter_psi)
   print "Generating quadrant %i, %s" % (i+1, ramachandran_type)
   #Use small margins to make the plots nice and big,
   #and specify a SQUARE plot area (to go with aspect ratio, asp=1)
   r.par(mar = [2, 2, 2, 2], pty="s")
   #This function will do a Ramachandran plot in the next quadrant
   #which we setup using par(mfrow-...)
                       x_grid=mid_points, y_grid=mid_points, z_grid=data,
                       xlim=[-180,180], ylim=[-180,180], asp=1.0,
                       plot_title=ramachandran_type, drawlabels=False,
                       levels=rama_levels, col=rama_colors)
   print ramachandran_type + " Done"

r.dev_off() print "Done" </source>

more RMSDs

<source lang="bash"> echo -e "1\n1\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix.xpm -dt 10 echo -e "6\n6\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix_chain-c_b.xpm -dt 10 xpm2ps -f _rmsd-matrix.xpm -o _rmsd-matrix.eps -rainbow blue xpm2ps -f _rmsd-matrix_chain-c_b.xpm -o _rmsd-matrix_chain-c_b.eps -rainbow blue </source>

Cluster Analysis

<source lang="bash"> echo 6 6 | g_cluster -s _md.tpr -f _traj_norot.xtc -dm _rmsd-matrix_chain-c_b.xpm -dist _rmsd-distribution.xvg -o _clusters.xpm -sz _cluster-sizes.xvg -tr _cluster-transitions.xpm -ntr _cluster-transitions.xvg -clid _cluster-id-over-time.xvg -cl _clusters.pdb -cutoff 0.1 -method gromos -dt 10 </source>

Distance RMSD

<source lang="bash"> echo 1|g_rmsdist -s _md.tpr -f _traj_norot.xtc -o _distance-rmsd.xvg </source>