Difference between revisions of "Task 10 - Journal (PKU)"

From Bioinformatikpedia
(Secondary Structure)
(Distance RMSD)
 
(14 intermediate revisions by the same user not shown)
Line 47: Line 47:
   
 
<source lang="bash">
 
<source lang="bash">
echo -e "1\n1 0"|g_rmsf -f _md.xtc -s _md.tpr -o _rmsf-per-residue.xvg -ox _average.pdb -oq _bfactors.pdb -res
+
echo 1 1|g_rmsf -f _md.xtc -s _md.tpr -o _rmsf-per-residue.xvg -ox _average.pdb -oq _bfactors.pdb -res
echo 1 0 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-all-atom-vs-start.xvg
+
echo 1 1 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-all-atom-vs-start.xvg
echo 4 0 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-backbone-vs-start.xvg
+
echo 4 4 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-backbone-vs-start.xvg
 
echo 1 |trjconv -f _traj_norot.xtc -s _md.tpr -o _protein.xtc
 
echo 1 |trjconv -f _traj_norot.xtc -s _md.tpr -o _protein.xtc
echo -e "1\n1 0" |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-all-atom-vs-average.xvg
+
echo 1 1 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-all-atom-vs-average.xvg
echo -e "4\n4 0" |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-backbone-vs-average.xvg
+
echo 4 4 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-backbone-vs-average.xvg
  +
</source>
  +
  +
== Difference of RMSF ==
  +
  +
distributed by Marc:
  +
<source lang="perl">
  +
#!/usr/bin/perl -w
  +
$swExit = 0;
  +
  +
if(@ARGV < 2){
  +
print STDERR "statTD.pl <RMSF FILE1> <RMSF FILE2> [START] [END]\n";
  +
exit;
  +
}
  +
else{
  +
if(! -e "$ARGV[0]"){
  +
print STDERR "File $ARGV[0] does not exist!\n";
  +
$swExit = 1;
  +
}
  +
if(! -e "$ARGV[1]"){
  +
print STDERR "File $ARGV[1] does not exist!\n";
  +
$swExit = 1;
  +
}
  +
}
  +
if(defined $ARGV[2]){
  +
  +
if(not defined $ARGV[3]){
  +
print STDERR "END value missing!\n";
  +
$swExit = 1;
  +
}
  +
elsif($ARGV[2] !~ m/[0-9]+/ || $ARGV[3] !~ m/[0-9]+/){
  +
print STDERR "Problem with the ranges please check!\n";
  +
$swExit = 1;
  +
}
  +
}
  +
if($swExit){exit;}
  +
if(defined $ARGV[2] && defined $ARGV[3] && $ARGV[2] > $ARGV[3]){
  +
$tmp = $ARGV[2];
  +
$ARGV[2] = $ARGV[3];
  +
$ARGV[3] = $tmp;
  +
}
  +
  +
  +
open(FILE,$ARGV[0]);
  +
while(<FILE>){
  +
chomp($_);
  +
if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){
  +
#print "$1 $2\t";
  +
if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]){
  +
$in1{$1} = $2;
  +
#print "1\n";
  +
}
  +
elsif(not defined $ARGV[2]){
  +
$in1{$1} = $2;
  +
#print "$ARGV[2] $ARGV[3] 3\n";
  +
}
  +
  +
  +
}
  +
}
  +
close FILE;
  +
open(FILE,$ARGV[1]);
  +
while(<FILE>){
  +
chomp($_);
  +
if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){
  +
#print "$1 $2\n";
  +
if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]) {
  +
$in2{$1} = $2;
  +
}
  +
elsif(not defined $ARGV[2]){
  +
$in2{$1} = $2;
  +
}
  +
}
  +
}
  +
close FILE;
  +
if((keys %in1) != (keys %in2)){
  +
print STDERR "Number of datapoints not identical!\n";
  +
exit;
  +
}
  +
  +
$value = &getPV(\%in1,\%in2);
  +
  +
print "$value";
  +
if($value <= 0.001){
  +
print "**\n";
  +
}
  +
elsif($value <= 0.05){
  +
print "*\n";
  +
}
  +
else{
  +
print "-\n";
  +
}
  +
sub getPV{
  +
my (%data1) = %{$_[0]};
  +
my (%data2) = %{$_[1]};
  +
my $avg = 0;
  +
my $cnt = 0;
  +
my $std = 0;
  +
my $ste = 0;
  +
my $ratio = 0;
  +
  +
foreach $key (sort keys %data1){
  +
#print "$key\n";
  +
$avg += ($data1{$key} - $data2{$key});
  +
$cnt++;
  +
}
  +
$avg /= $cnt;
  +
foreach $key (sort keys %data1){
  +
$std += (($data1{$key} - $data2{$key}) - $avg)**2;
  +
}
  +
$std /= $cnt;
  +
$std = sqrt($std);
  +
$ste = $std/sqrt($cnt);
  +
$ratio = abs($avg/$ste);
  +
open(OUT,">/tmp/rscript.tmp");
  +
print OUT "ratio <- c($ratio);\n";
  +
print OUT "dfree <- c($cnt);\n";
  +
print OUT "dt(ratio,dfree);\n";
  +
close OUT;
  +
$value = `Rscript /tmp/rscript.tmp`;
  +
chomp($value);
  +
`rm -f /tmp/rscript.tmp`;
  +
return (split/\s+/,$value)[1];
  +
}
  +
 
</source>
 
</source>
   
Line 72: Line 196:
 
</source>
 
</source>
   
== Saltbridges ==
+
== Secondary Structure ==
 
<source lang="bash">
 
<source lang="bash">
  +
wget http://swift.cmbi.ru.nl/gv/dssp/HTML/dsspcmbi
g_saltbr -f _traj_norot.xtc -s _md.tpr -t 0.5 -sep
 
  +
chmod +x dsspcmbi
  +
export DSSP=$HOME/dsspcmbi
  +
echo 1|do_dssp -f _traj_norot.xtc -s _md.tpr -o _secondary-structure.xpm -sc _secondary-structure.xvg -dt 10
  +
xpm2ps -f _secondary-structure.xpm -o _secondary-structure.eps
 
</source>
 
</source>
 
== Secondary Structure ==
 
 
not yet
 
   
 
== Ramachandran Plot ==
 
== Ramachandran Plot ==
 
<source lang="bash">
 
<source lang="bash">
 
g_rama -f _traj_norot.xtc -s _md.tpr -o _ramachandran.xvg
 
g_rama -f _traj_norot.xtc -s _md.tpr -o _ramachandran.xvg
  +
</source>
  +
  +
Somewhat nicer and modified from [http://www2.warwick.ac.uk/fac/sci/moac/people/students/peter_cock/python/ramachandran/drawing/ here]. Extract the desired time steps in the traj_filtered.pdb with your favorite method, clean residues unknown to the script and use the script below with the pdb-file as parameter. Or use the whole trajectory-pdb, if you have the memory and want a nice black page. :-) Requires numpy, rpy (modules available in the ubuntu repositories) and MMTK ([https://sourcesup.renater.fr/projects/mmtk/ download]).
  +
<source lang="bash">
  +
python ramachandran.py <time-step>
  +
</source>
  +
  +
<source lang="python">
  +
#!/usr/bin/env python
  +
#usage: ramachandran.py <pdb-file>
  +
rama_GENERAL = "General"
  +
rama_GLYCINE = "Glycine"
  +
rama_PROLINE = "Proline"
  +
rama_PRE_PRO = "Pre-Pro"
  +
ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO]
  +
  +
# I have used the same colours as RAMPAGE
  +
# http://raven.bioc.cam.ac.uk/rampage.php
  +
rama_settings = {"General" : ([0, 0.0005, 0.02, 1],
  +
['#FFFFFF','#B3E8FF','#7FD9FF'],
  +
"pct/rama/rama500-general.data"),
  +
# or rama500-general-nosec.data
  +
"Glycine" : ([0, 0.002, 0.02, 1],
  +
['#FFFFFF','#FFE8C5','#FFCC7F'],
  +
"pct/rama/rama500-gly-sym.data"),
  +
# or rama500-gly-sym-nosec.data
  +
"Proline" : ([0, 0.002, 0.02, 1],
  +
['#FFFFFF','#D0FFC5','#7FFF8C'],
  +
"pct/rama/rama500-pro.data"),
  +
"Pre-Pro" : ([0, 0.002, 0.02, 1],
  +
['#FFFFFF','#B3E8FF','#7FD9FF'],
  +
"pct/rama/rama500-prepro.data")}
  +
#P.S. Also rama500-ala-nosec.data
  +
  +
import numpy
  +
def load_data_file(filename) :
  +
STEP=2
  +
HALF_STEP=1
  +
STEP = HALF_STEP*2
  +
lower_bounds = range(-180, 180, STEP)
  +
mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP)
  +
upper_bounds = range(-180+STEP, 180+STEP, STEP)
  +
  +
data = numpy.array([[0.0 for x in mid_points] for y in mid_points],
  +
dtype=numpy.float)
  +
  +
"""
  +
# Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30"
  +
# Number of dimensions: 2
  +
# For each dimension, 1 to 2: lower_bound upper_bound number_of_bins wrapping
  +
# x1: -180.0 180.0 180 true
  +
# x2: -180.0 180.0 180 true
  +
# List of table coordinates and values. (Value is last number on each line.)
  +
-179.0 -179.0 0.0918642445114388
  +
-179.0 -177.0 0.07105717866463215
  +
...
  +
"""
  +
input_file = open(filename,"r")
  +
for line in input_file :
  +
#Strip the newline character(s) from the end of the line
  +
if line[-1]=="\n" : line = line[:-1]
  +
if line[-1]=="\r" : line = line[:-1]
  +
if line[0]=="#" :
  +
#comment
  +
pass
  +
else :
  +
#data
  +
parts = line.split()
  +
assert len(parts)==3
  +
  +
x1 = float(parts[0]) #phi
  +
x2 = float(parts[1]) #psi
  +
value = float(parts[2])
  +
  +
assert x1 == float(int(x1))
  +
assert x2 == float(int(x2))
  +
i1 = mid_points.index(int(x1))
  +
i2 = mid_points.index(int(x2))
  +
  +
data[i1,i2]=value
  +
input_file.close()
  +
return (data, lower_bounds, mid_points, upper_bounds)
  +
  +
#filename = "stat/rama/rama500-general.data"
  +
#data, lower_bounds, mid_points, upper_bounds = load_data_file(filename)
  +
#print sum(sum(data))
  +
  +
from rpy import *
  +
r.library("MASS")
  +
  +
print "Creating R function",
  +
r("""
  +
ramachandran.plot <- function(x.scatter, y.scatter,
  +
x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid,
  +
xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE),
  +
zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels),
  +
nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) -
  +
1), plot.title="", plot.axes, key.title, key.axes, asp = NA,
  +
xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes,
  +
...)
  +
{
  +
if (missing(z.grid)) {
  +
stop("no 'z.grid' matrix specified")
  +
}
  +
else if (is.list(x.grid)) {
  +
y.grid <- x.grid$y
  +
x.grid <- x.grid$x
  +
}
  +
if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0))
  +
stop("increasing 'x.grid' and 'y.grid' values expected")
  +
  +
plot.new()
  +
plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)
  +
  +
if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1)
  +
stop("no proper 'z.grid' matrix specified")
  +
if (!is.double(z.grid))
  +
storage.mode(z.grid) <- "double"
  +
.Internal(filledcontour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels),
  +
col = col))
  +
  +
if (!(missing(x.scatter)) && !(missing(y.scatter))) {
  +
plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL),
  +
xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp,
  +
type="p", pch=20, cex=0.1)
  +
}
  +
  +
if (missing(plot.axes)) {
  +
if (axes) {
  +
title(main=plot.title, xlab=expression(phi), ylab=expression(psi))
  +
axis(1, at=c(-180,-90,0,90,180))
  +
axis(2, at=c(-180,-90,0,90,180))
  +
}
  +
}
  +
else plot.axes
  +
if (frame.plot)
  +
box()
  +
if (missing(plot.title))
  +
title(...)
  +
else plot.title
  +
invisible()
  +
}
  +
""")
  +
print "Done"
  +
  +
  +
import math
  +
def degrees(rad_angle) :
  +
"""Converts and angle in radians to degrees, mapped to the range [-180,180]"""
  +
angle = rad_angle * 180 / math.pi
  +
#Note this assume the radians angle is positive as that's what MMTK does
  +
while angle > 180 :
  +
angle = angle - 360
  +
return angle
  +
  +
def next_residue(residue) :
  +
"""Expects an MMTK residue, returns the next residue in the chain, or None"""
  +
#Proteins go N terminal --> C terminal
  +
#The next reside is bonded to the C of this atom...
  +
for a in residue.peptide.C.bondedTo():
  +
if a.parent.parent != residue:
  +
return a.parent.parent
  +
return None
  +
  +
  +
def residue_amino(residue) :
  +
"""Expects an MMTK residue, returns the three letter amino acid code in upper case"""
  +
if residue :
  +
return residue.name[0:3].upper()
  +
else :
  +
return None
  +
  +
def residue_ramachandran_type(residue) :
  +
"""Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)"""
  +
if residue_amino(residue)=="GLY" :
  +
return rama_GLYCINE
  +
elif residue_amino(residue)=="PRO" :
  +
return rama_PROLINE
  +
elif residue_amino(next_residue(residue))=="PRO" :
  +
#exlcudes those that are Pro or Gly
  +
return rama_PRE_PRO
  +
else :
  +
return rama_GENERAL
  +
  +
scatter_phi = dict()
  +
scatter_psi = dict()
  +
for ramachandran_type in ramachandran_types :
  +
scatter_phi[ramachandran_type]=[]
  +
scatter_psi[ramachandran_type]=[]
  +
  +
  +
pdb=sys.argv[1]
  +
pdb_filename = "%s.pdb" % pdb
  +
  +
print "Loading PDB file: " + pdb_filename
  +
import MMTK.PDB
  +
import MMTK.Proteins
  +
#protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens")
  +
# Load the PDB file, ignore the hydrogrens, and then build a model of the peptides:
  +
configuration = MMTK.PDB.PDBConfiguration(pdb_filename)
  +
configuration.deleteHydrogens()
  +
protein = MMTK.Proteins.Protein(configuration.createPeptideChains(model = "no_hydrogens"))
  +
for chain in protein :
  +
print chain.name
  +
for residue in chain :
  +
phi, psi = residue.phiPsi()
  +
#print residue.name, phi, psi
  +
if phi and psi :
  +
ramachandran_type = residue_ramachandran_type(residue)
  +
assert ramachandran_type in ramachandran_types
  +
scatter_phi[ramachandran_type].append(degrees(phi))
  +
scatter_psi[ramachandran_type].append(degrees(psi))
  +
assert len(scatter_phi) == len(scatter_psi)
  +
  +
print "Done"
  +
  +
#pdf_filename = "../%s.pdf" % pdb
  +
#r.pdf(pdf_filename)
  +
  +
png_filename = "%s.png" % pdb
  +
r.png(png_filename)
  +
  +
#To get four plots on one page, you could use :
  +
#
  +
#r.split_screen([2,2]) #split into two by two screen
  +
#
  +
#Or:
  +
#
  +
#r.layout(Numeric.array([[1,2],[3,4]]), respect=True)
  +
#
  +
#But I went for simply:
  +
  +
r.par(mfrow=[2,2])
  +
  +
for (i,ramachandran_type) in enumerate(ramachandran_types) :
  +
#pdf_filename = "../%s_%s.pdf" % (pdb, ramachandran_type)
  +
(rama_levels, rama_colors, rama_filename) = rama_settings[ramachandran_type]
  +
  +
print "Loading data file: " + rama_filename,
  +
data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename)
  +
print "Done"
  +
  +
#print "Creating PDF output file: " + pdf_filename,
  +
#r.pdf(pdf_filename)
  +
#r.plot(scatter_phi, scatter_psi)
  +
  +
print "Generating quadrant %i, %s" % (i+1, ramachandran_type)
  +
#r.screen(i+1)
  +
  +
#Use small margins to make the plots nice and big,
  +
#and specify a SQUARE plot area (to go with aspect ratio, asp=1)
  +
r.par(mar = [2, 2, 2, 2], pty="s")
  +
  +
#This function will do a Ramachandran plot in the next quadrant
  +
#which we setup using par(mfrow-...)
  +
r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type],
  +
y_scatter=scatter_psi[ramachandran_type],
  +
x_grid=mid_points, y_grid=mid_points, z_grid=data,
  +
xlim=[-180,180], ylim=[-180,180], asp=1.0,
  +
plot_title=ramachandran_type, drawlabels=False,
  +
levels=rama_levels, col=rama_colors)
  +
print ramachandran_type + " Done"
  +
  +
r.dev_off()
  +
print "Done"
 
</source>
 
</source>
   
Line 96: Line 485:
 
== Cluster Analysis ==
 
== Cluster Analysis ==
 
<source lang="bash">
 
<source lang="bash">
echo 6 6 | g_cluster -s _md.tpr -f _traj_norot.xtc -dm _rmsd-matrix_chain_c_b.xpm -dist _rmsd-distribution.xvg -o _clusters.xpm -sz _cluster-sizes.xvg -tr _cluster-transitions.xpm -ntr _cluster-transitions.xvg -clid _cluster-id-over-time.xvg -cl _clusters.pdb -cutoff 0.1 -method gromos -dt 10
+
echo 6 6 | g_cluster -s _md.tpr -f _traj_norot.xtc -dm _rmsd-matrix_chain-c_b.xpm -dist _rmsd-distribution.xvg -o _clusters.xpm -sz _cluster-sizes.xvg -tr _cluster-transitions.xpm -ntr _cluster-transitions.xvg -clid _cluster-id-over-time.xvg -cl _clusters.pdb -cutoff 0.1 -method gromos -dt 10
 
</source>
 
</source>
   
Line 103: Line 492:
 
echo 1|g_rmsdist -s _md.tpr -f _traj_norot.xtc -o _distance-rmsd.xvg
 
echo 1|g_rmsdist -s _md.tpr -f _traj_norot.xtc -o _distance-rmsd.xvg
 
</source>
 
</source>
  +
  +
[[Category: Phenylketonuria 2012]]

Latest revision as of 11:58, 29 August 2012

Initial checks

Verify completion, frame number and frame rate: <source lang="bash"> gmxcheck -f _md.xtc </source>

performance statistics: <source lang="bash"> tail _md.log </source>

Create Trajectories

<source lang="bash">

  1. fix jumps over the periodic boundary

trjconv -f _md.xtc -s _solv.pdb -pbc nojump -o _traj_nojump.xtc<<EOF0\nEOF

  1. center protein in the box

trjconv -f _traj_nojump.xtc -s _solv.pdb -center -o _traj_center.xtc<<EOF1\n0\nEOF

  1. eliminate rotations

trjconv -f _traj_center.xtc -s _solv.pdb -fit rot+trans -o _traj_norot.xtc<<EOF1\n0\nEOF

  1. create pdb file

trjconv -f _traj_norot.xtc -s _solv.pdb -o _traj.pdb<<EOF1\nEOF

  1. reduce fluctuations and write every fifth frame

g_filter -f _traj.pdb -s _solv.pdb -ol _traj_filtered.pdb -fit -nf 5<<EOF1\nEOF

</source>

Plot Energies

<source lang="bash"> echo 12 0 | g_energy -f _md.edr -o _temperature.xvg echo 13 0 | g_energy -f _md.edr -o _pressure.xvg echo -e "9\n10\n11 0" | g_energy -f _md.edr -o _energy.xvg echo 18 0 | g_energy -f _md.edr -o _volume.xvg echo 19 0 | g_energy -f _md.edr -o _density.xvg echo -e "15\n16\n17 0" | g_energy -f _md.edr -o _box.xvg echo -e "48\n50 0" | g_energy -f _md.edr -o _coulomb-inter.xvg echo -e "49\n51 0" | g_energy -f _md.edr -o _vanderwaals-inter.xvg </source>

Minimal Distance of Interactions

<source lang="bash"> echo 1 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_distance.xvg -pi echo 3 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_c_alpha_distance.xvg -pi </source>

RMSD Calculations

<source lang="bash"> echo 1 1|g_rmsf -f _md.xtc -s _md.tpr -o _rmsf-per-residue.xvg -ox _average.pdb -oq _bfactors.pdb -res echo 1 1 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-all-atom-vs-start.xvg echo 4 4 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-backbone-vs-start.xvg echo 1 |trjconv -f _traj_norot.xtc -s _md.tpr -o _protein.xtc echo 1 1 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-all-atom-vs-average.xvg echo 4 4 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-backbone-vs-average.xvg </source>

Difference of RMSF

distributed by Marc: <source lang="perl">

  1. !/usr/bin/perl -w

$swExit = 0;

if(@ARGV < 2){ print STDERR "statTD.pl <RMSF FILE1> <RMSF FILE2> [START] [END]\n"; exit; } else{ if(! -e "$ARGV[0]"){ print STDERR "File $ARGV[0] does not exist!\n"; $swExit = 1; } if(! -e "$ARGV[1]"){ print STDERR "File $ARGV[1] does not exist!\n"; $swExit = 1; } } if(defined $ARGV[2]){

if(not defined $ARGV[3]){ print STDERR "END value missing!\n"; $swExit = 1; } elsif($ARGV[2] !~ m/[0-9]+/ || $ARGV[3] !~ m/[0-9]+/){ print STDERR "Problem with the ranges please check!\n"; $swExit = 1; } } if($swExit){exit;} if(defined $ARGV[2] && defined $ARGV[3] && $ARGV[2] > $ARGV[3]){ $tmp = $ARGV[2]; $ARGV[2] = $ARGV[3]; $ARGV[3] = $tmp; }


open(FILE,$ARGV[0]); while(<FILE>){ chomp($_); if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){ #print "$1 $2\t"; if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]){ $in1{$1} = $2; #print "1\n"; } elsif(not defined $ARGV[2]){ $in1{$1} = $2; #print "$ARGV[2] $ARGV[3] 3\n"; }


} } close FILE; open(FILE,$ARGV[1]); while(<FILE>){ chomp($_); if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){ #print "$1 $2\n"; if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]) { $in2{$1} = $2; } elsif(not defined $ARGV[2]){ $in2{$1} = $2; } } } close FILE; if((keys %in1) != (keys %in2)){ print STDERR "Number of datapoints not identical!\n"; exit; }

$value = &getPV(\%in1,\%in2);

print "$value"; if($value <= 0.001){ print "**\n"; } elsif($value <= 0.05){ print "*\n"; } else{ print "-\n"; } sub getPV{ my (%data1) = %{$_[0]}; my (%data2) = %{$_[1]}; my $avg = 0; my $cnt = 0; my $std = 0; my $ste = 0; my $ratio = 0;

foreach $key (sort keys %data1){ #print "$key\n"; $avg += ($data1{$key} - $data2{$key}); $cnt++; } $avg /= $cnt; foreach $key (sort keys %data1){ $std += (($data1{$key} - $data2{$key}) - $avg)**2; } $std /= $cnt; $std = sqrt($std); $ste = $std/sqrt($cnt); $ratio = abs($avg/$ste); open(OUT,">/tmp/rscript.tmp"); print OUT "ratio <- c($ratio);\n"; print OUT "dfree <- c($cnt);\n"; print OUT "dt(ratio,dfree);\n"; close OUT; $value = `Rscript /tmp/rscript.tmp`; chomp($value); `rm -f /tmp/rscript.tmp`; return (split/\s+/,$value)[1]; }

</source>

Radius of Gyration

<source lang="bash"> echo 1 0 |g_gyrate -f _traj_norot.xtc -s _md.tpr -o _radius-of-gyration.xvg echo 1 0 |g_gyrate -f _traj_norot.xtc -s _md.tpr -o _moments_of_inertia.xvg -moi </source>

Accessible Surface

<source lang="bash"> echo 1 1 |g_sas -f _traj_norot.xtc -s _md.tpr -o _solvent-accessible-surface.xvg -oa _atomic-sas.xvg -or _residue-sas.xvg </source>

Hydrogen Bonds

<source lang="bash"> echo 1 1 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-intra-protein.xvg echo 1 12 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-protein-water.xvg </source>

Secondary Structure

<source lang="bash"> wget http://swift.cmbi.ru.nl/gv/dssp/HTML/dsspcmbi chmod +x dsspcmbi export DSSP=$HOME/dsspcmbi echo 1|do_dssp -f _traj_norot.xtc -s _md.tpr -o _secondary-structure.xpm -sc _secondary-structure.xvg -dt 10 xpm2ps -f _secondary-structure.xpm -o _secondary-structure.eps </source>

Ramachandran Plot

<source lang="bash"> g_rama -f _traj_norot.xtc -s _md.tpr -o _ramachandran.xvg </source>

Somewhat nicer and modified from here. Extract the desired time steps in the traj_filtered.pdb with your favorite method, clean residues unknown to the script and use the script below with the pdb-file as parameter. Or use the whole trajectory-pdb, if you have the memory and want a nice black page. :-) Requires numpy, rpy (modules available in the ubuntu repositories) and MMTK (download). <source lang="bash"> python ramachandran.py <time-step> </source>

<source lang="python">

  1. !/usr/bin/env python
  2. usage: ramachandran.py <pdb-file>

rama_GENERAL = "General" rama_GLYCINE = "Glycine" rama_PROLINE = "Proline" rama_PRE_PRO = "Pre-Pro" ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO]

  1. I have used the same colours as RAMPAGE
  2. http://raven.bioc.cam.ac.uk/rampage.php

rama_settings = {"General" : ([0, 0.0005, 0.02, 1],

                             ['#FFFFFF','#B3E8FF','#7FD9FF'],
                             "pct/rama/rama500-general.data"),
                             # or rama500-general-nosec.data
                "Glycine" : ([0, 0.002,  0.02, 1],
                             ['#FFFFFF','#FFE8C5','#FFCC7F'],
                             "pct/rama/rama500-gly-sym.data"),
                             # or rama500-gly-sym-nosec.data
                "Proline" : ([0, 0.002,  0.02, 1],
                             ['#FFFFFF','#D0FFC5','#7FFF8C'],
                             "pct/rama/rama500-pro.data"),
                "Pre-Pro" : ([0, 0.002,  0.02, 1],
                             ['#FFFFFF','#B3E8FF','#7FD9FF'],
                             "pct/rama/rama500-prepro.data")}
                #P.S. Also rama500-ala-nosec.data

import numpy def load_data_file(filename) :

   STEP=2
   HALF_STEP=1
   STEP = HALF_STEP*2
   lower_bounds = range(-180, 180, STEP)
   mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP)
   upper_bounds = range(-180+STEP, 180+STEP, STEP)
   data = numpy.array([[0.0 for x in mid_points] for y in mid_points],
                        dtype=numpy.float)
   """
   # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30"
   # Number of dimensions: 2
   # For each dimension, 1 to 2: lower_bound  upper_bound  number_of_bins  wrapping
   #   x1: -180.0 180.0 180 true
   #   x2: -180.0 180.0 180 true
   # List of table coordinates and values. (Value is last number on each line.)
   -179.0 -179.0 0.0918642445114388
   -179.0 -177.0 0.07105717866463215
   ...
   """
   input_file = open(filename,"r")
   for line in input_file :
       #Strip the newline character(s) from the end of the line
       if line[-1]=="\n" : line = line[:-1]
       if line[-1]=="\r" : line = line[:-1]
       if line[0]=="#" :
           #comment
           pass
       else :
           #data
           parts = line.split()
           assert len(parts)==3
           
           x1 = float(parts[0]) #phi
           x2 = float(parts[1]) #psi
           value = float(parts[2])
           
           assert x1 == float(int(x1))
           assert x2 == float(int(x2))
           i1 = mid_points.index(int(x1))
           i2 = mid_points.index(int(x2))
           
           data[i1,i2]=value
   input_file.close()
   return (data, lower_bounds, mid_points, upper_bounds)
  1. filename = "stat/rama/rama500-general.data"
  2. data, lower_bounds, mid_points, upper_bounds = load_data_file(filename)
  3. print sum(sum(data))

from rpy import * r.library("MASS")

print "Creating R function", r(""" ramachandran.plot <- function(x.scatter, y.scatter,

   x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid,
   xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE),
   zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels),
   nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) -
       1), plot.title="", plot.axes, key.title, key.axes, asp = NA,
   xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes,
   ...)

{

   if (missing(z.grid)) {
       stop("no 'z.grid' matrix specified")
   }
   else if (is.list(x.grid)) {
       y.grid <- x.grid$y
       x.grid <- x.grid$x
   }
   if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0))
       stop("increasing 'x.grid' and 'y.grid' values expected")
   plot.new()
   plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)
   if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1)
       stop("no proper 'z.grid' matrix specified")
   if (!is.double(z.grid))
       storage.mode(z.grid) <- "double"
   .Internal(filledcontour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels),
       col = col))
   if (!(missing(x.scatter)) && !(missing(y.scatter))) {
       plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL),
               xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp,
               type="p", pch=20, cex=0.1)
   }
       
   if (missing(plot.axes)) {
       if (axes) {
           title(main=plot.title, xlab=expression(phi), ylab=expression(psi))
           axis(1, at=c(-180,-90,0,90,180))
           axis(2, at=c(-180,-90,0,90,180))
       }
   }
   else plot.axes
   if (frame.plot)
       box()
   if (missing(plot.title))
       title(...)
   else plot.title
   invisible()

} """) print "Done"


import math def degrees(rad_angle) :

   """Converts and angle in radians to degrees, mapped to the range [-180,180]"""
   angle = rad_angle * 180 / math.pi
   #Note this assume the radians angle is positive as that's what MMTK does
   while angle > 180 :
       angle = angle - 360
   return angle

def next_residue(residue) :

   """Expects an MMTK residue, returns the next residue in the chain, or None"""
   #Proteins go N terminal --> C terminal
   #The next reside is bonded to the C of this atom...
   for a in residue.peptide.C.bondedTo():
       if a.parent.parent != residue:
           return a.parent.parent
   return None


def residue_amino(residue) :

   """Expects an MMTK residue, returns the three letter amino acid code in upper case"""
   if residue :
       return residue.name[0:3].upper()
   else :
       return None

def residue_ramachandran_type(residue) :

   """Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)"""
   if residue_amino(residue)=="GLY" :
       return rama_GLYCINE
   elif residue_amino(residue)=="PRO" :
       return rama_PROLINE
   elif residue_amino(next_residue(residue))=="PRO" :
       #exlcudes those that are Pro or Gly
       return rama_PRE_PRO
   else :
       return rama_GENERAL

scatter_phi = dict() scatter_psi = dict() for ramachandran_type in ramachandran_types :

   scatter_phi[ramachandran_type]=[]
   scatter_psi[ramachandran_type]=[]
   

pdb=sys.argv[1] pdb_filename = "%s.pdb" % pdb

print "Loading PDB file: " + pdb_filename import MMTK.PDB import MMTK.Proteins

  1. protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens")
  2. Load the PDB file, ignore the hydrogrens, and then build a model of the peptides:

configuration = MMTK.PDB.PDBConfiguration(pdb_filename) configuration.deleteHydrogens() protein = MMTK.Proteins.Protein(configuration.createPeptideChains(model = "no_hydrogens")) for chain in protein :

   print chain.name
   for residue in chain :
       phi, psi = residue.phiPsi()
       #print residue.name, phi, psi
       if phi and psi :
           ramachandran_type = residue_ramachandran_type(residue)
           assert ramachandran_type in ramachandran_types
           scatter_phi[ramachandran_type].append(degrees(phi))
           scatter_psi[ramachandran_type].append(degrees(psi))
       assert len(scatter_phi) == len(scatter_psi)
   

print "Done"

  1. pdf_filename = "../%s.pdf" % pdb
  2. r.pdf(pdf_filename)

png_filename = "%s.png" % pdb r.png(png_filename)

  1. To get four plots on one page, you could use :
  2. r.split_screen([2,2]) #split into two by two screen
  3. Or:
  4. r.layout(Numeric.array([[1,2],[3,4]]), respect=True)
  5. But I went for simply:

r.par(mfrow=[2,2])

for (i,ramachandran_type) in enumerate(ramachandran_types) :

   #pdf_filename = "../%s_%s.pdf" % (pdb, ramachandran_type)
   (rama_levels, rama_colors, rama_filename) = rama_settings[ramachandran_type]
   
   print "Loading data file: " + rama_filename,
   data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename)
   print "Done"
   #print "Creating PDF output file: " + pdf_filename,
   #r.pdf(pdf_filename)
   #r.plot(scatter_phi, scatter_psi)
   print "Generating quadrant %i, %s" % (i+1, ramachandran_type)
   #r.screen(i+1)
   #Use small margins to make the plots nice and big,
   #and specify a SQUARE plot area (to go with aspect ratio, asp=1)
   r.par(mar = [2, 2, 2, 2], pty="s")
   #This function will do a Ramachandran plot in the next quadrant
   #which we setup using par(mfrow-...)
   r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type],
                       y_scatter=scatter_psi[ramachandran_type], 
                       x_grid=mid_points, y_grid=mid_points, z_grid=data,
                       xlim=[-180,180], ylim=[-180,180], asp=1.0,
                       plot_title=ramachandran_type, drawlabels=False,
                       levels=rama_levels, col=rama_colors)
   print ramachandran_type + " Done"

r.dev_off() print "Done" </source>

more RMSDs

<source lang="bash"> echo -e "1\n1\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix.xpm -dt 10 echo -e "6\n6\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix_chain-c_b.xpm -dt 10 xpm2ps -f _rmsd-matrix.xpm -o _rmsd-matrix.eps -rainbow blue xpm2ps -f _rmsd-matrix_chain-c_b.xpm -o _rmsd-matrix_chain-c_b.eps -rainbow blue </source>

Cluster Analysis

<source lang="bash"> echo 6 6 | g_cluster -s _md.tpr -f _traj_norot.xtc -dm _rmsd-matrix_chain-c_b.xpm -dist _rmsd-distribution.xvg -o _clusters.xpm -sz _cluster-sizes.xvg -tr _cluster-transitions.xpm -ntr _cluster-transitions.xvg -clid _cluster-id-over-time.xvg -cl _clusters.pdb -cutoff 0.1 -method gromos -dt 10 </source>

Distance RMSD

<source lang="bash"> echo 1|g_rmsdist -s _md.tpr -f _traj_norot.xtc -o _distance-rmsd.xvg </source>