Difference between revisions of "Task 10 - Journal (PKU)"

Latest revision as of 11:58, 29 August 2012

Initial checks

Verify completion, frame number and frame rate: <source lang="bash"> gmxcheck -f _md.xtc </source>

performance statistics: <source lang="bash"> tail _md.log </source>

Create Trajectories

fix jumps over the periodic boundary

trjconv -f _md.xtc -s _solv.pdb -pbc nojump -o _traj_nojump.xtc<<EOF0\nEOF

center protein in the box

trjconv -f _traj_nojump.xtc -s _solv.pdb -center -o _traj_center.xtc<<EOF1\n0\nEOF

eliminate rotations

trjconv -f _traj_center.xtc -s _solv.pdb -fit rot+trans -o _traj_norot.xtc<<EOF1\n0\nEOF

create pdb file

trjconv -f _traj_norot.xtc -s _solv.pdb -o _traj.pdb<<EOF1\nEOF

reduce fluctuations and write every fifth frame

g_filter -f _traj.pdb -s _solv.pdb -ol _traj_filtered.pdb -fit -nf 5<<EOF1\nEOF

</source>

Plot Energies

<source lang="bash"> echo 12 0 | g_energy -f _md.edr -o _temperature.xvg echo 13 0 | g_energy -f _md.edr -o _pressure.xvg echo -e "9\n10\n11 0" | g_energy -f _md.edr -o _energy.xvg echo 18 0 | g_energy -f _md.edr -o _volume.xvg echo 19 0 | g_energy -f _md.edr -o _density.xvg echo -e "15\n16\n17 0" | g_energy -f _md.edr -o _box.xvg echo -e "48\n50 0" | g_energy -f _md.edr -o _coulomb-inter.xvg echo -e "49\n51 0" | g_energy -f _md.edr -o _vanderwaals-inter.xvg </source>

Minimal Distance of Interactions

<source lang="bash"> echo 1 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_distance.xvg -pi echo 3 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_c_alpha_distance.xvg -pi </source>

RMSD Calculations

<source lang="bash"> echo 1 1|g_rmsf -f _md.xtc -s _md.tpr -o _rmsf-per-residue.xvg -ox _average.pdb -oq _bfactors.pdb -res echo 1 1 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-all-atom-vs-start.xvg echo 4 4 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-backbone-vs-start.xvg echo 1 |trjconv -f _traj_norot.xtc -s _md.tpr -o _protein.xtc echo 1 1 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-all-atom-vs-average.xvg echo 4 4 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-backbone-vs-average.xvg </source>

Difference of RMSF

distributed by Marc: <source lang="perl">

!/usr/bin/perl -w

$swExit = 0;

if(@ARGV < 2){ print STDERR "statTD.pl <RMSF FILE1> <RMSF FILE2> [START] [END]\n"; exit; } else{ if(! -e "$ARGV[0]"){ print STDERR "File $ARGV[0] does not exist!\n"; $swExit = 1; } if(! -e "$ARGV[1]"){ print STDERR "File $ARGV[1] does not exist!\n"; $swExit = 1; } } if(defined $ARGV[2]){

if(not defined $ARGV[3]){ print STDERR "END value missing!\n"; $swExit = 1; } elsif($ARGV[2] !~ m/[0-9]+/ || $ARGV[3] !~ m/[0-9]+/){ print STDERR "Problem with the ranges please check!\n"; $swExit = 1; } } if($swExit){exit;} if(defined $ARGV[2] && defined $ARGV[3] && $ARGV[2] > $ARGV[3]){ $tmp = $ARGV[2]; $ARGV[2] = $ARGV[3]; $ARGV[3] = $tmp; }

open(FILE,$ARGV[0]); while(<FILE>){ chomp($_); if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){ #print "$1 $2\t"; if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]){ $in1{$1} = $2; #print "1\n"; } elsif(not defined $ARGV[2]){ $in1{$1} = $2; #print "$ARGV[2] $ARGV[3] 3\n"; }

} } close FILE; open(FILE,$ARGV[1]); while(<FILE>){ chomp($_); if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){ #print "$1 $2\n"; if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]) { $in2{$1} = $2; } elsif(not defined $ARGV[2]){ $in2{$1} = $2; } } } close FILE; if((keys %in1) != (keys %in2)){ print STDERR "Number of datapoints not identical!\n"; exit; }

$value = &getPV(\%in1,\%in2);

print "$value"; if($value <= 0.001){ print "**\n"; } elsif($value <= 0.05){ print "*\n"; } else{ print "-\n"; } sub getPV{ my (%data1) = %{$_[0]}; my (%data2) = %{$_[1]}; my $avg = 0; my $cnt = 0; my $std = 0; my $ste = 0; my $ratio = 0;

foreach $key (sort keys %data1){ #print "$key\n"; $avg += ($data1{$key} - $data2{$key}); $cnt++; } $avg /= $cnt; foreach $key (sort keys %data1){ $std += (($data1{$key} - $data2{$key}) - $avg)**2; } $std /= $cnt; $std = sqrt($std); $ste = $std/sqrt($cnt); $ratio = abs($avg/$ste); open(OUT,">/tmp/rscript.tmp"); print OUT "ratio <- c($ratio);\n"; print OUT "dfree <- c($cnt);\n"; print OUT "dt(ratio,dfree);\n"; close OUT; $value = `Rscript /tmp/rscript.tmp`; chomp($value); `rm -f /tmp/rscript.tmp`; return (split/\s+/,$value)[1]; }

</source>

Radius of Gyration

<source lang="bash"> echo 1 0 |g_gyrate -f _traj_norot.xtc -s _md.tpr -o _radius-of-gyration.xvg echo 1 0 |g_gyrate -f _traj_norot.xtc -s _md.tpr -o _moments_of_inertia.xvg -moi </source>

Accessible Surface

<source lang="bash"> echo 1 1 |g_sas -f _traj_norot.xtc -s _md.tpr -o _solvent-accessible-surface.xvg -oa _atomic-sas.xvg -or _residue-sas.xvg </source>

Hydrogen Bonds

<source lang="bash"> echo 1 1 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-intra-protein.xvg echo 1 12 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-protein-water.xvg </source>

Secondary Structure

<source lang="bash"> wget http://swift.cmbi.ru.nl/gv/dssp/HTML/dsspcmbi chmod +x dsspcmbi export DSSP=$HOME/dsspcmbi echo 1|do_dssp -f _traj_norot.xtc -s _md.tpr -o _secondary-structure.xpm -sc _secondary-structure.xvg -dt 10 xpm2ps -f _secondary-structure.xpm -o _secondary-structure.eps </source>

Ramachandran Plot

<source lang="bash"> g_rama -f _traj_norot.xtc -s _md.tpr -o _ramachandran.xvg </source>

Somewhat nicer and modified from here. Extract the desired time steps in the traj_filtered.pdb with your favorite method, clean residues unknown to the script and use the script below with the pdb-file as parameter. Or use the whole trajectory-pdb, if you have the memory and want a nice black page. :-) Requires numpy, rpy (modules available in the ubuntu repositories) and MMTK (download). <source lang="bash"> python ramachandran.py <time-step> </source>

!/usr/bin/env python
usage: ramachandran.py <pdb-file>

rama_GENERAL = "General" rama_GLYCINE = "Glycine" rama_PROLINE = "Proline" rama_PRE_PRO = "Pre-Pro" ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO]

I have used the same colours as RAMPAGE
http://raven.bioc.cam.ac.uk/rampage.php

rama_settings = {"General" : ([0, 0.0005, 0.02, 1],

                             ['#FFFFFF','#B3E8FF','#7FD9FF'],
                             "pct/rama/rama500-general.data"),
                             # or rama500-general-nosec.data
                "Glycine" : ([0, 0.002,  0.02, 1],
                             ['#FFFFFF','#FFE8C5','#FFCC7F'],
                             "pct/rama/rama500-gly-sym.data"),
                             # or rama500-gly-sym-nosec.data
                "Proline" : ([0, 0.002,  0.02, 1],
                             ['#FFFFFF','#D0FFC5','#7FFF8C'],
                             "pct/rama/rama500-pro.data"),
                "Pre-Pro" : ([0, 0.002,  0.02, 1],
                             ['#FFFFFF','#B3E8FF','#7FD9FF'],
                             "pct/rama/rama500-prepro.data")}
                #P.S. Also rama500-ala-nosec.data

import numpy def load_data_file(filename) :

   STEP=2
   HALF_STEP=1
   STEP = HALF_STEP*2
   lower_bounds = range(-180, 180, STEP)
   mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP)
   upper_bounds = range(-180+STEP, 180+STEP, STEP)

   data = numpy.array([[0.0 for x in mid_points] for y in mid_points],
                        dtype=numpy.float)

   """
   # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30"
   # Number of dimensions: 2
   # For each dimension, 1 to 2: lower_bound  upper_bound  number_of_bins  wrapping
   #   x1: -180.0 180.0 180 true
   #   x2: -180.0 180.0 180 true
   # List of table coordinates and values. (Value is last number on each line.)
   -179.0 -179.0 0.0918642445114388
   -179.0 -177.0 0.07105717866463215
   ...
   """
   input_file = open(filename,"r")
   for line in input_file :
       #Strip the newline character(s) from the end of the line
       if line[-1]=="\n" : line = line[:-1]
       if line[-1]=="\r" : line = line[:-1]
       if line[0]=="#" :
           #comment
           pass
       else :
           #data
           parts = line.split()
           assert len(parts)==3
           
           x1 = float(parts[0]) #phi
           x2 = float(parts[1]) #psi
           value = float(parts[2])
           
           assert x1 == float(int(x1))
           assert x2 == float(int(x2))
           i1 = mid_points.index(int(x1))
           i2 = mid_points.index(int(x2))
           
           data[i1,i2]=value
   input_file.close()
   return (data, lower_bounds, mid_points, upper_bounds)

filename = "stat/rama/rama500-general.data"
data, lower_bounds, mid_points, upper_bounds = load_data_file(filename)
print sum(sum(data))

from rpy import * r.library("MASS")

print "Creating R function", r(""" ramachandran.plot <- function(x.scatter, y.scatter,

   x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid,
   xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE),
   zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels),
   nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) -
       1), plot.title="", plot.axes, key.title, key.axes, asp = NA,
   xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes,
   ...)

{

   if (missing(z.grid)) {
       stop("no 'z.grid' matrix specified")
   }
   else if (is.list(x.grid)) {
       y.grid <- x.grid$y
       x.grid <- x.grid$x
   }
   if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0))
       stop("increasing 'x.grid' and 'y.grid' values expected")

   plot.new()
   plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)

   if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1)
       stop("no proper 'z.grid' matrix specified")
   if (!is.double(z.grid))
       storage.mode(z.grid) <- "double"
   .Internal(filledcontour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels),
       col = col))

   if (!(missing(x.scatter)) && !(missing(y.scatter))) {
       plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL),
               xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp,
               type="p", pch=20, cex=0.1)
   }
       
   if (missing(plot.axes)) {
       if (axes) {
           title(main=plot.title, xlab=expression(phi), ylab=expression(psi))
           axis(1, at=c(-180,-90,0,90,180))
           axis(2, at=c(-180,-90,0,90,180))
       }
   }
   else plot.axes
   if (frame.plot)
       box()
   if (missing(plot.title))
       title(...)
   else plot.title
   invisible()

} """) print "Done"

import math def degrees(rad_angle) :

   """Converts and angle in radians to degrees, mapped to the range [-180,180]"""
   angle = rad_angle * 180 / math.pi
   #Note this assume the radians angle is positive as that's what MMTK does
   while angle > 180 :
       angle = angle - 360
   return angle

def next_residue(residue) :

   """Expects an MMTK residue, returns the next residue in the chain, or None"""
   #Proteins go N terminal --> C terminal
   #The next reside is bonded to the C of this atom...
   for a in residue.peptide.C.bondedTo():
       if a.parent.parent != residue:
           return a.parent.parent
   return None

def residue_amino(residue) :

   """Expects an MMTK residue, returns the three letter amino acid code in upper case"""
   if residue :
       return residue.name[0:3].upper()
   else :
       return None

def residue_ramachandran_type(residue) :

   """Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)"""
   if residue_amino(residue)=="GLY" :
       return rama_GLYCINE
   elif residue_amino(residue)=="PRO" :
       return rama_PROLINE
   elif residue_amino(next_residue(residue))=="PRO" :
       #exlcudes those that are Pro or Gly
       return rama_PRE_PRO
   else :
       return rama_GENERAL

scatter_phi = dict() scatter_psi = dict() for ramachandran_type in ramachandran_types :

   scatter_phi[ramachandran_type]=[]
   scatter_psi[ramachandran_type]=[]

pdb=sys.argv[1] pdb_filename = "%s.pdb" % pdb

print "Loading PDB file: " + pdb_filename import MMTK.PDB import MMTK.Proteins

protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens")
Load the PDB file, ignore the hydrogrens, and then build a model of the peptides:

configuration = MMTK.PDB.PDBConfiguration(pdb_filename) configuration.deleteHydrogens() protein = MMTK.Proteins.Protein(configuration.createPeptideChains(model = "no_hydrogens")) for chain in protein :

   print chain.name
   for residue in chain :
       phi, psi = residue.phiPsi()
       #print residue.name, phi, psi
       if phi and psi :
           ramachandran_type = residue_ramachandran_type(residue)
           assert ramachandran_type in ramachandran_types
           scatter_phi[ramachandran_type].append(degrees(phi))
           scatter_psi[ramachandran_type].append(degrees(psi))
       assert len(scatter_phi) == len(scatter_psi)

print "Done"

pdf_filename = "../%s.pdf" % pdb
r.pdf(pdf_filename)

png_filename = "%s.png" % pdb r.png(png_filename)

To get four plots on one page, you could use :
r.split_screen([2,2]) #split into two by two screen
Or:
r.layout(Numeric.array([[1,2],[3,4]]), respect=True)
But I went for simply:

r.par(mfrow=[2,2])

for (i,ramachandran_type) in enumerate(ramachandran_types) :

   #pdf_filename = "../%s_%s.pdf" % (pdb, ramachandran_type)
   (rama_levels, rama_colors, rama_filename) = rama_settings[ramachandran_type]
   
   print "Loading data file: " + rama_filename,
   data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename)
   print "Done"

   #print "Creating PDF output file: " + pdf_filename,
   #r.pdf(pdf_filename)
   #r.plot(scatter_phi, scatter_psi)

   print "Generating quadrant %i, %s" % (i+1, ramachandran_type)
   #r.screen(i+1)

   #Use small margins to make the plots nice and big,
   #and specify a SQUARE plot area (to go with aspect ratio, asp=1)
   r.par(mar = [2, 2, 2, 2], pty="s")

   #This function will do a Ramachandran plot in the next quadrant
   #which we setup using par(mfrow-...)
   r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type],
                       y_scatter=scatter_psi[ramachandran_type], 
                       x_grid=mid_points, y_grid=mid_points, z_grid=data,
                       xlim=[-180,180], ylim=[-180,180], asp=1.0,
                       plot_title=ramachandran_type, drawlabels=False,
                       levels=rama_levels, col=rama_colors)
   print ramachandran_type + " Done"

r.dev_off() print "Done" </source>

more RMSDs

<source lang="bash"> echo -e "1\n1\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix.xpm -dt 10 echo -e "6\n6\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix_chain-c_b.xpm -dt 10 xpm2ps -f _rmsd-matrix.xpm -o _rmsd-matrix.eps -rainbow blue xpm2ps -f _rmsd-matrix_chain-c_b.xpm -o _rmsd-matrix_chain-c_b.eps -rainbow blue </source>

Cluster Analysis

<source lang="bash"> echo 6 6 | g_cluster -s _md.tpr -f _traj_norot.xtc -dm _rmsd-matrix_chain-c_b.xpm -dist _rmsd-distribution.xvg -o _clusters.xpm -sz _cluster-sizes.xvg -tr _cluster-transitions.xpm -ntr _cluster-transitions.xvg -clid _cluster-id-over-time.xvg -cl _clusters.pdb -cutoff 0.1 -method gromos -dt 10 </source>

Distance RMSD

<source lang="bash"> echo 1|g_rmsdist -s _md.tpr -f _traj_norot.xtc -o _distance-rmsd.xvg </source>

Difference between revisions of "Task 10 - Journal (PKU)"

Latest revision as of 11:58, 29 August 2012

Contents

Initial checks

Create Trajectories

Plot Energies

Minimal Distance of Interactions

RMSD Calculations

Difference of RMSF

Radius of Gyration

Accessible Surface

Hydrogen Bonds

Secondary Structure

Ramachandran Plot

more RMSDs

Cluster Analysis

Distance RMSD

Navigation menu

Views

Personal tools

Bioinformatik navigation

MediaWiki navigation

Search

Tools

@@ Line 10: / Line 10: @@
 </source>
-==create trajectories==
+==Create Trajectories==
 <source lang="bash">
@@ Line 18: / Line 18: @@
 trjconv -f _traj_nojump.xtc -s _solv.pdb -center -o _traj_center.xtc<<EOF1\n0\nEOF
 #eliminate rotations
-trjconv -f _traj_center.xtc -s _solv.pdb -fit rot+trans -o _norot.xtc<<EOF1\n0\nEOF
+trjconv -f _traj_center.xtc -s _solv.pdb -fit rot+trans -o _traj_norot.xtc<<EOF1\n0\nEOF
 #create pdb file
-trjconv -f _norot.xtc -s _solv.pdb -o _traj.pdb<<EOF1\nEOF
+trjconv -f _traj_norot.xtc -s _solv.pdb -o _traj.pdb<<EOF1\nEOF
 #reduce fluctuations and write every fifth frame
-g_filter -f 1J8U_traj.pdb -s 1J8U_solv.pdb -ol _traj_filtered.pdb -fit -nf 5<<EOF1\nEOF
+g_filter -f _traj.pdb -s _solv.pdb -ol _traj_filtered.pdb -fit -nf 5<<EOF1\nEOF
 </source>
+== Plot Energies ==
+<source lang="bash">
+echo 12 0 | g_energy -f _md.edr -o _temperature.xvg
+echo 13 0 | g_energy -f _md.edr -o _pressure.xvg
+echo -e "9\n10\n11 0" | g_energy -f _md.edr -o _energy.xvg
+echo 18 0 | g_energy -f _md.edr -o _volume.xvg
+echo 19 0 | g_energy -f _md.edr -o _density.xvg
+echo -e "15\n16\n17 0" | g_energy -f _md.edr -o _box.xvg
+echo -e "48\n50 0" | g_energy -f _md.edr -o _coulomb-inter.xvg
+echo -e "49\n51 0" | g_energy -f _md.edr -o _vanderwaals-inter.xvg
+</source>
+== Minimal Distance of Interactions ==
+<source lang="bash">
+echo 1 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_distance.xvg -pi
+echo 3 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_c_alpha_distance.xvg -pi
+</source>
+==RMSD Calculations==
+<source lang="bash">
+echo 1 1|g_rmsf -f _md.xtc -s _md.tpr -o _rmsf-per-residue.xvg -ox _average.pdb -oq _bfactors.pdb -res
+echo 1 1 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-all-atom-vs-start.xvg
+echo 4 4 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-backbone-vs-start.xvg
+echo 1 |trjconv -f _traj_norot.xtc -s _md.tpr -o _protein.xtc
+echo 1 1 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-all-atom-vs-average.xvg
+echo 4 4 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-backbone-vs-average.xvg
+</source>
+== Difference of RMSF ==
+distributed by Marc:
+<source lang="perl">
+#!/usr/bin/perl -w
+$swExit = 0;
+if(@ARGV < 2){
+	print STDERR "statTD.pl <RMSF FILE1> <RMSF FILE2> [START] [END]\n";
+	exit;
+}
+else{
+	if(! -e "$ARGV[0]"){
+		print STDERR "File $ARGV[0] does not exist!\n";
+		$swExit = 1;
+	}
+	if(! -e "$ARGV[1]"){
+		print STDERR "File $ARGV[1] does not exist!\n";
+		$swExit = 1;
+	}
+}
+if(defined $ARGV[2]){
+	if(not defined $ARGV[3]){
+		print STDERR "END value missing!\n";
+		$swExit = 1;
+	}
+	elsif($ARGV[2] !~ m/[0-9]+/ || $ARGV[3] !~ m/[0-9]+/){
+		print STDERR "Problem with the ranges please check!\n";
+		$swExit = 1;
+	}
+}
+if($swExit){exit;}
+if(defined $ARGV[2] && defined $ARGV[3] && $ARGV[2] > $ARGV[3]){
+	$tmp = $ARGV[2];
+	$ARGV[2] = $ARGV[3];
+	$ARGV[3] = $tmp;
+}
+open(FILE,$ARGV[0]);
+while(<FILE>){
+	chomp($_);
+	if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){
+		#print "$1 $2\t";
+		if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]){
+			$in1{$1} = $2;
+			#print "1\n";
+		}
+		elsif(not defined $ARGV[2]){
+			$in1{$1} = $2;
+			#print "$ARGV[2] $ARGV[3] 3\n";
+		}
+	}
+}
+close FILE;
+open(FILE,$ARGV[1]);
+while(<FILE>){
+	chomp($_);
+	if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){
+		#print "$1 $2\n";
+		if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]) {
+			$in2{$1} = $2;
+		}
+		elsif(not defined $ARGV[2]){
+			$in2{$1} = $2;
+		}
+	}
+}
+close FILE;
+if((keys %in1) != (keys %in2)){
+	print STDERR "Number of datapoints not identical!\n";
+	exit;
+}
+$value = &getPV(\%in1,\%in2);
+print "$value";
+if($value <= 0.001){
+	print "**\n";
+}
+elsif($value <= 0.05){
+	print "*\n";
+}
+else{
+	print "-\n";
+}
+sub getPV{
+	my (%data1) = %{$_[0]};
+	my (%data2) = %{$_[1]};
+	my $avg = 0;
+	my $cnt = 0;
+	my $std = 0;
+	my $ste = 0;
+	my $ratio = 0;
+	foreach $key (sort keys %data1){
+		#print "$key\n";
+		$avg += ($data1{$key} - $data2{$key});
+		$cnt++;
+	}
+	$avg /= $cnt;
+	foreach $key (sort keys %data1){
+		$std += (($data1{$key} - $data2{$key}) - $avg)**2;
+	}
+	$std /= $cnt;
+	$std = sqrt($std);
+	$ste = $std/sqrt($cnt);
+	$ratio = abs($avg/$ste);
+	open(OUT,">/tmp/rscript.tmp");
+	print OUT "ratio <- c($ratio);\n";
+	print OUT "dfree <- c($cnt);\n";
+	print OUT "dt(ratio,dfree);\n";
+	close OUT;
+	$value = `Rscript /tmp/rscript.tmp`;
+	chomp($value);
+	`rm -f /tmp/rscript.tmp`;
+	return (split/\s+/,$value)[1];
+}
+</source>
+== Radius of Gyration ==
+<source lang="bash">
+echo 1 0 |g_gyrate -f  _traj_norot.xtc -s _md.tpr -o _radius-of-gyration.xvg
+echo 1 0 |g_gyrate -f  _traj_norot.xtc -s _md.tpr -o _moments_of_inertia.xvg -moi
+</source>
+== Accessible Surface ==
+<source lang="bash">
+echo 1 1 |g_sas -f _traj_norot.xtc -s _md.tpr -o _solvent-accessible-surface.xvg -oa _atomic-sas.xvg -or _residue-sas.xvg
+</source>
+== Hydrogen Bonds ==
+<source lang="bash">
+echo 1 1 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-intra-protein.xvg
+echo 1 12 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-protein-water.xvg
+</source>
+== Secondary Structure ==
+<source lang="bash">
+wget http://swift.cmbi.ru.nl/gv/dssp/HTML/dsspcmbi
+chmod +x dsspcmbi
+export DSSP=$HOME/dsspcmbi
+echo 1|do_dssp -f _traj_norot.xtc -s _md.tpr -o _secondary-structure.xpm -sc _secondary-structure.xvg -dt 10
+xpm2ps -f _secondary-structure.xpm -o _secondary-structure.eps
+</source>
+== Ramachandran Plot ==
+<source lang="bash">
+g_rama -f _traj_norot.xtc -s _md.tpr -o _ramachandran.xvg
+</source>
+Somewhat nicer and modified from [http://www2.warwick.ac.uk/fac/sci/moac/people/students/peter_cock/python/ramachandran/drawing/ here]. Extract the desired time steps in the traj_filtered.pdb with your favorite method, clean residues unknown to the script and use the script below with the pdb-file as parameter. Or use the whole trajectory-pdb, if you have the memory and want a nice black page. :-) Requires numpy, rpy (modules available in the ubuntu repositories) and MMTK ([https://sourcesup.renater.fr/projects/mmtk/ download]).
+<source lang="bash">
+python ramachandran.py <time-step>
+</source>
+<source lang="python">
+#!/usr/bin/env python
+#usage: ramachandran.py <pdb-file>
+rama_GENERAL = "General"
+rama_GLYCINE = "Glycine"
+rama_PROLINE = "Proline"
+rama_PRE_PRO = "Pre-Pro"
+ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO]
+# I have used the same colours as RAMPAGE
+# http://raven.bioc.cam.ac.uk/rampage.php
+rama_settings = {"General" : ([0, 0.0005, 0.02, 1],
+                              ['#FFFFFF','#B3E8FF','#7FD9FF'],
+                              "pct/rama/rama500-general.data"),
+                              # or rama500-general-nosec.data
+                 "Glycine" : ([0, 0.002,  0.02, 1],
+                              ['#FFFFFF','#FFE8C5','#FFCC7F'],
+                              "pct/rama/rama500-gly-sym.data"),
+                              # or rama500-gly-sym-nosec.data
+                 "Proline" : ([0, 0.002,  0.02, 1],
+                              ['#FFFFFF','#D0FFC5','#7FFF8C'],
+                              "pct/rama/rama500-pro.data"),
+                 "Pre-Pro" : ([0, 0.002,  0.02, 1],
+                              ['#FFFFFF','#B3E8FF','#7FD9FF'],
+                              "pct/rama/rama500-prepro.data")}
+                 #P.S. Also rama500-ala-nosec.data
+import numpy
+def load_data_file(filename) :
+    STEP=2
+    HALF_STEP=1
+    STEP = HALF_STEP*2
+    lower_bounds = range(-180, 180, STEP)
+    mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP)
+    upper_bounds = range(-180+STEP, 180+STEP, STEP)
+    data = numpy.array([[0.0 for x in mid_points] for y in mid_points],
+                         dtype=numpy.float)
+    """
+    # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30"
+    # Number of dimensions: 2
+    # For each dimension, 1 to 2: lower_bound  upper_bound  number_of_bins  wrapping
+    #   x1: -180.0 180.0 180 true
+    #   x2: -180.0 180.0 180 true
+    # List of table coordinates and values. (Value is last number on each line.)
+    -179.0 -179.0 0.0918642445114388
+    -179.0 -177.0 0.07105717866463215
+    ...
+    """
+    input_file = open(filename,"r")
+    for line in input_file :
+        #Strip the newline character(s) from the end of the line
+        if line[-1]=="\n" : line = line[:-1]
+        if line[-1]=="\r" : line = line[:-1]
+        if line[0]=="#" :
+            #comment
+            pass
+        else :
+            #data
+            parts = line.split()
+            assert len(parts)==3
+            x1 = float(parts[0]) #phi
+            x2 = float(parts[1]) #psi
+            value = float(parts[2])
+            assert x1 == float(int(x1))
+            assert x2 == float(int(x2))
+            i1 = mid_points.index(int(x1))
+            i2 = mid_points.index(int(x2))
+            data[i1,i2]=value
+    input_file.close()
+    return (data, lower_bounds, mid_points, upper_bounds)
+#filename = "stat/rama/rama500-general.data"
+#data, lower_bounds, mid_points, upper_bounds = load_data_file(filename)
+#print sum(sum(data))
+from rpy import *
+r.library("MASS")
+print "Creating R function",
+r("""
+ramachandran.plot <- function(x.scatter, y.scatter,
+    x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid,
+    xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE),
+    zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels),
+    nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) -
+), plot.title="", plot.axes, key.title, key.axes, asp = NA,
+    xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes,
+    ...)
+{
+    if (missing(z.grid)) {
+        stop("no 'z.grid' matrix specified")
+    }
+    else if (is.list(x.grid)) {
+        y.grid <- x.grid$y
+        x.grid <- x.grid$x
+    }
+    if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0))
+        stop("increasing 'x.grid' and 'y.grid' values expected")
+    plot.new()
+    plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)
+    if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1)
+        stop("no proper 'z.grid' matrix specified")
+    if (!is.double(z.grid))
+        storage.mode(z.grid) <- "double"
+    .Internal(filledcontour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels),
+        col = col))
+    if (!(missing(x.scatter)) && !(missing(y.scatter))) {
+        plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL),
+                xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp,
+                type="p", pch=20, cex=0.1)
+    }
+    if (missing(plot.axes)) {
+        if (axes) {
+            title(main=plot.title, xlab=expression(phi), ylab=expression(psi))
+            axis(1, at=c(-180,-90,0,90,180))
+            axis(2, at=c(-180,-90,0,90,180))
+        }
+    }
+    else plot.axes
+    if (frame.plot)
+        box()
+    if (missing(plot.title))
+        title(...)
+    else plot.title
+    invisible()
+}
+""")
+print "Done"
+import math
+def degrees(rad_angle) :
+    """Converts and angle in radians to degrees, mapped to the range [-180,180]"""
+    angle = rad_angle * 180 / math.pi
+    #Note this assume the radians angle is positive as that's what MMTK does
+    while angle > 180 :
+        angle = angle - 360
+    return angle
+def next_residue(residue) :
+    """Expects an MMTK residue, returns the next residue in the chain, or None"""
+    #Proteins go N terminal --> C terminal
+    #The next reside is bonded to the C of this atom...
+    for a in residue.peptide.C.bondedTo():
+        if a.parent.parent != residue:
+            return a.parent.parent
+    return None
+def residue_amino(residue) :
+    """Expects an MMTK residue, returns the three letter amino acid code in upper case"""
+    if residue :
+        return residue.name[0:3].upper()
+    else :
+        return None
+def residue_ramachandran_type(residue) :
+    """Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)"""
+    if residue_amino(residue)=="GLY" :
+        return rama_GLYCINE
+    elif residue_amino(residue)=="PRO" :
+        return rama_PROLINE
+    elif residue_amino(next_residue(residue))=="PRO" :
+        #exlcudes those that are Pro or Gly
+        return rama_PRE_PRO
+    else :
+        return rama_GENERAL
+scatter_phi = dict()
+scatter_psi = dict()
+for ramachandran_type in ramachandran_types :
+    scatter_phi[ramachandran_type]=[]
+    scatter_psi[ramachandran_type]=[]
+pdb=sys.argv[1]
+pdb_filename = "%s.pdb" % pdb
+print "Loading PDB file: " + pdb_filename
+import MMTK.PDB
+import MMTK.Proteins
+#protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens")
+# Load the PDB file, ignore the hydrogrens, and then build a model of the peptides:
+configuration = MMTK.PDB.PDBConfiguration(pdb_filename)
+configuration.deleteHydrogens()
+protein = MMTK.Proteins.Protein(configuration.createPeptideChains(model = "no_hydrogens"))
+for chain in protein :
+    print chain.name
+    for residue in chain :
+        phi, psi = residue.phiPsi()
+        #print residue.name, phi, psi
+        if phi and psi :
+            ramachandran_type = residue_ramachandran_type(residue)
+            assert ramachandran_type in ramachandran_types
+            scatter_phi[ramachandran_type].append(degrees(phi))
+            scatter_psi[ramachandran_type].append(degrees(psi))
+        assert len(scatter_phi) == len(scatter_psi)
+print "Done"
+#pdf_filename = "../%s.pdf" % pdb
+#r.pdf(pdf_filename)
+png_filename = "%s.png" % pdb
+r.png(png_filename)
+#To get four plots on one page, you could use :
+#
+#r.split_screen([2,2]) #split into two by two screen
+#
+#Or:
+#
+#r.layout(Numeric.array([[1,2],[3,4]]), respect=True)
+#
+#But I went for simply:
+r.par(mfrow=[2,2])
+for (i,ramachandran_type) in enumerate(ramachandran_types) :
+    #pdf_filename = "../%s_%s.pdf" % (pdb, ramachandran_type)
+    (rama_levels, rama_colors, rama_filename) = rama_settings[ramachandran_type]
+    print "Loading data file: " + rama_filename,
+    data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename)
+    print "Done"
+    #print "Creating PDF output file: " + pdf_filename,
+    #r.pdf(pdf_filename)
+    #r.plot(scatter_phi, scatter_psi)
+    print "Generating quadrant %i, %s" % (i+1, ramachandran_type)
+    #r.screen(i+1)
+    #Use small margins to make the plots nice and big,
+    #and specify a SQUARE plot area (to go with aspect ratio, asp=1)
+    r.par(mar = [2, 2, 2, 2], pty="s")
+    #This function will do a Ramachandran plot in the next quadrant
+    #which we setup using par(mfrow-...)
+    r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type],
+                        y_scatter=scatter_psi[ramachandran_type],
+                        x_grid=mid_points, y_grid=mid_points, z_grid=data,
+                        xlim=[-180,180], ylim=[-180,180], asp=1.0,
+                        plot_title=ramachandran_type, drawlabels=False,
+                        levels=rama_levels, col=rama_colors)
+    print ramachandran_type + " Done"
+r.dev_off()
+print "Done"
+</source>
+== more RMSDs ==
+<source lang="bash">
+echo -e "1\n1\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix.xpm -dt 10
+echo -e "6\n6\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix_chain-c_b.xpm -dt 10
+xpm2ps -f _rmsd-matrix.xpm -o _rmsd-matrix.eps -rainbow blue
+xpm2ps -f _rmsd-matrix_chain-c_b.xpm -o _rmsd-matrix_chain-c_b.eps -rainbow blue
+</source>
+== Cluster Analysis ==
+<source lang="bash">
+echo 6 6 | g_cluster -s _md.tpr -f _traj_norot.xtc -dm _rmsd-matrix_chain-c_b.xpm -dist _rmsd-distribution.xvg -o _clusters.xpm -sz _cluster-sizes.xvg -tr _cluster-transitions.xpm -ntr _cluster-transitions.xvg -clid _cluster-id-over-time.xvg -cl _clusters.pdb -cutoff 0.1 -method gromos -dt 10
+</source>
+== Distance RMSD ==
+<source lang="bash">
+echo 1|g_rmsdist -s _md.tpr -f _traj_norot.xtc -o _distance-rmsd.xvg
+</source>
+[[Category: Phenylketonuria 2012]]