Difference between revisions of "Task 10 - Journal (PKU)"
(→Ramachandran Plot) |
(→Ramachandran Plot) |
||
Line 210: | Line 210: | ||
</source> |
</source> |
||
− | Somewhat nicer and modified from [http://www2.warwick.ac.uk/fac/sci/moac/people/students/peter_cock/python/ramachandran/drawing/ here]. Extract the desired time steps in the traj_filtered.pdb with your favorite method, clean residues unknown to the script and use the script below with the pdb-file as parameter. Or use the whole trajectory-pdb, if you have the memory and want a nice black page. :-) Requires numpy, rpy (modules available in the ubuntu repositories) and MMTK ([https://sourcesup.renater.fr/projects/mmtk/ download]. |
+ | Somewhat nicer and modified from [http://www2.warwick.ac.uk/fac/sci/moac/people/students/peter_cock/python/ramachandran/drawing/ here]. Extract the desired time steps in the traj_filtered.pdb with your favorite method, clean residues unknown to the script and use the script below with the pdb-file as parameter. Or use the whole trajectory-pdb, if you have the memory and want a nice black page. :-) Requires numpy, rpy (modules available in the ubuntu repositories) and MMTK ([https://sourcesup.renater.fr/projects/mmtk/ download]). |
<source lang="bash"> |
<source lang="bash"> |
||
python ramachandran.py <time-step> |
python ramachandran.py <time-step> |
Revision as of 17:37, 15 August 2012
Contents
Initial checks
Verify completion, frame number and frame rate: <source lang="bash"> gmxcheck -f _md.xtc </source>
performance statistics: <source lang="bash"> tail _md.log </source>
Create Trajectories
<source lang="bash">
- fix jumps over the periodic boundary
trjconv -f _md.xtc -s _solv.pdb -pbc nojump -o _traj_nojump.xtc<<EOF0\nEOF
- center protein in the box
trjconv -f _traj_nojump.xtc -s _solv.pdb -center -o _traj_center.xtc<<EOF1\n0\nEOF
- eliminate rotations
trjconv -f _traj_center.xtc -s _solv.pdb -fit rot+trans -o _traj_norot.xtc<<EOF1\n0\nEOF
- create pdb file
trjconv -f _traj_norot.xtc -s _solv.pdb -o _traj.pdb<<EOF1\nEOF
- reduce fluctuations and write every fifth frame
g_filter -f _traj.pdb -s _solv.pdb -ol _traj_filtered.pdb -fit -nf 5<<EOF1\nEOF
</source>
Plot Energies
<source lang="bash"> echo 12 0 | g_energy -f _md.edr -o _temperature.xvg echo 13 0 | g_energy -f _md.edr -o _pressure.xvg echo -e "9\n10\n11 0" | g_energy -f _md.edr -o _energy.xvg echo 18 0 | g_energy -f _md.edr -o _volume.xvg echo 19 0 | g_energy -f _md.edr -o _density.xvg echo -e "15\n16\n17 0" | g_energy -f _md.edr -o _box.xvg echo -e "48\n50 0" | g_energy -f _md.edr -o _coulomb-inter.xvg echo -e "49\n51 0" | g_energy -f _md.edr -o _vanderwaals-inter.xvg </source>
Minimal Distance of Interactions
<source lang="bash"> echo 1 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_distance.xvg -pi echo 3 0 |g_mindist -f _md.xtc -s _md.tpr -od _minimal_periodic_c_alpha_distance.xvg -pi </source>
RMSD Calculations
<source lang="bash"> echo 1 1|g_rmsf -f _md.xtc -s _md.tpr -o _rmsf-per-residue.xvg -ox _average.pdb -oq _bfactors.pdb -res echo 1 1 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-all-atom-vs-start.xvg echo 4 4 |g_rms -f _traj_norot.xtc -s _md.tpr -o _rmsd-backbone-vs-start.xvg echo 1 |trjconv -f _traj_norot.xtc -s _md.tpr -o _protein.xtc echo 1 1 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-all-atom-vs-average.xvg echo 4 4 |g_rms -f _protein.xtc -s _average.pdb -o _rmsd-backbone-vs-average.xvg </source>
Difference of RMSF
distributed by Marc: <source lang="perl">
- !/usr/bin/perl -w
$swExit = 0;
if(@ARGV < 2){ print STDERR "statTD.pl <RMSF FILE1> <RMSF FILE2> [START] [END]\n"; exit; } else{ if(! -e "$ARGV[0]"){ print STDERR "File $ARGV[0] does not exist!\n"; $swExit = 1; } if(! -e "$ARGV[1]"){ print STDERR "File $ARGV[1] does not exist!\n"; $swExit = 1; } } if(defined $ARGV[2]){
if(not defined $ARGV[3]){ print STDERR "END value missing!\n"; $swExit = 1; } elsif($ARGV[2] !~ m/[0-9]+/ || $ARGV[3] !~ m/[0-9]+/){ print STDERR "Problem with the ranges please check!\n"; $swExit = 1; } } if($swExit){exit;} if(defined $ARGV[2] && defined $ARGV[3] && $ARGV[2] > $ARGV[3]){ $tmp = $ARGV[2]; $ARGV[2] = $ARGV[3]; $ARGV[3] = $tmp; }
open(FILE,$ARGV[0]);
while(<FILE>){
chomp($_);
if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){
#print "$1 $2\t";
if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]){
$in1{$1} = $2;
#print "1\n";
}
elsif(not defined $ARGV[2]){
$in1{$1} = $2;
#print "$ARGV[2] $ARGV[3] 3\n";
}
}
}
close FILE;
open(FILE,$ARGV[1]);
while(<FILE>){
chomp($_);
if(/^\s*([0-9]+)\s+([0-9]+\.[0-9]+)/){
#print "$1 $2\n";
if(defined $ARGV[2] && defined $ARGV[3] && $1 >= $ARGV[2] && $1 <= $ARGV[3]) {
$in2{$1} = $2;
}
elsif(not defined $ARGV[2]){
$in2{$1} = $2;
}
}
}
close FILE;
if((keys %in1) != (keys %in2)){
print STDERR "Number of datapoints not identical!\n";
exit;
}
$value = &getPV(\%in1,\%in2);
print "$value"; if($value <= 0.001){ print "**\n"; } elsif($value <= 0.05){ print "*\n"; } else{ print "-\n"; } sub getPV{ my (%data1) = %{$_[0]}; my (%data2) = %{$_[1]}; my $avg = 0; my $cnt = 0; my $std = 0; my $ste = 0; my $ratio = 0;
foreach $key (sort keys %data1){ #print "$key\n"; $avg += ($data1{$key} - $data2{$key}); $cnt++; } $avg /= $cnt; foreach $key (sort keys %data1){ $std += (($data1{$key} - $data2{$key}) - $avg)**2; } $std /= $cnt; $std = sqrt($std); $ste = $std/sqrt($cnt); $ratio = abs($avg/$ste); open(OUT,">/tmp/rscript.tmp"); print OUT "ratio <- c($ratio);\n"; print OUT "dfree <- c($cnt);\n"; print OUT "dt(ratio,dfree);\n"; close OUT; $value = `Rscript /tmp/rscript.tmp`; chomp($value); `rm -f /tmp/rscript.tmp`; return (split/\s+/,$value)[1]; }
</source>
Radius of Gyration
<source lang="bash"> echo 1 0 |g_gyrate -f _traj_norot.xtc -s _md.tpr -o _radius-of-gyration.xvg echo 1 0 |g_gyrate -f _traj_norot.xtc -s _md.tpr -o _moments_of_inertia.xvg -moi </source>
Accessible Surface
<source lang="bash"> echo 1 1 |g_sas -f _traj_norot.xtc -s _md.tpr -o _solvent-accessible-surface.xvg -oa _atomic-sas.xvg -or _residue-sas.xvg </source>
Hydrogen Bonds
<source lang="bash"> echo 1 1 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-intra-protein.xvg echo 1 12 | g_hbond -f _traj_norot.xtc -s _md.tpr -num _hydrogen-bonds-protein-water.xvg </source>
Secondary Structure
<source lang="bash"> wget http://swift.cmbi.ru.nl/gv/dssp/HTML/dsspcmbi chmod +x dsspcmbi export DSSP=$HOME/dsspcmbi echo 1|do_dssp -f _traj_norot.xtc -s _md.tpr -o _secondary-structure.xpm -sc _secondary-structure.xvg -dt 10 xpm2ps -f _secondary-structure.xpm -o _secondary-structure.eps </source>
Ramachandran Plot
<source lang="bash"> g_rama -f _traj_norot.xtc -s _md.tpr -o _ramachandran.xvg </source>
Somewhat nicer and modified from here. Extract the desired time steps in the traj_filtered.pdb with your favorite method, clean residues unknown to the script and use the script below with the pdb-file as parameter. Or use the whole trajectory-pdb, if you have the memory and want a nice black page. :-) Requires numpy, rpy (modules available in the ubuntu repositories) and MMTK (download). <source lang="bash"> python ramachandran.py <time-step> </source>
<source lang="python">
- !/usr/bin/env python
- usage: ramachandran.py <pdb-file>
rama_GENERAL = "General" rama_GLYCINE = "Glycine" rama_PROLINE = "Proline" rama_PRE_PRO = "Pre-Pro" ramachandran_types = [rama_GENERAL,rama_GLYCINE,rama_PROLINE,rama_PRE_PRO]
- I have used the same colours as RAMPAGE
- http://raven.bioc.cam.ac.uk/rampage.php
rama_settings = {"General" : ([0, 0.0005, 0.02, 1],
['#FFFFFF','#B3E8FF','#7FD9FF'], "pct/rama/rama500-general.data"), # or rama500-general-nosec.data "Glycine" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#FFE8C5','#FFCC7F'], "pct/rama/rama500-gly-sym.data"), # or rama500-gly-sym-nosec.data "Proline" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#D0FFC5','#7FFF8C'], "pct/rama/rama500-pro.data"), "Pre-Pro" : ([0, 0.002, 0.02, 1], ['#FFFFFF','#B3E8FF','#7FD9FF'], "pct/rama/rama500-prepro.data")} #P.S. Also rama500-ala-nosec.data
import numpy def load_data_file(filename) :
STEP=2 HALF_STEP=1 STEP = HALF_STEP*2 lower_bounds = range(-180, 180, STEP) mid_points = range(-180+HALF_STEP, 180+HALF_STEP, STEP) upper_bounds = range(-180+STEP, 180+STEP, STEP)
data = numpy.array([[0.0 for x in mid_points] for y in mid_points], dtype=numpy.float)
""" # Table name/description: "Top500 General case (not Gly, Pro, or pre-Pro) B<30" # Number of dimensions: 2 # For each dimension, 1 to 2: lower_bound upper_bound number_of_bins wrapping # x1: -180.0 180.0 180 true # x2: -180.0 180.0 180 true # List of table coordinates and values. (Value is last number on each line.) -179.0 -179.0 0.0918642445114388 -179.0 -177.0 0.07105717866463215 ... """ input_file = open(filename,"r") for line in input_file : #Strip the newline character(s) from the end of the line if line[-1]=="\n" : line = line[:-1] if line[-1]=="\r" : line = line[:-1] if line[0]=="#" : #comment pass else : #data parts = line.split() assert len(parts)==3 x1 = float(parts[0]) #phi x2 = float(parts[1]) #psi value = float(parts[2]) assert x1 == float(int(x1)) assert x2 == float(int(x2)) i1 = mid_points.index(int(x1)) i2 = mid_points.index(int(x2)) data[i1,i2]=value input_file.close() return (data, lower_bounds, mid_points, upper_bounds)
- filename = "stat/rama/rama500-general.data"
- data, lower_bounds, mid_points, upper_bounds = load_data_file(filename)
- print sum(sum(data))
from rpy import * r.library("MASS")
print "Creating R function", r(""" ramachandran.plot <- function(x.scatter, y.scatter,
x.grid = seq(0, 1, len = nrow(z)), y.grid = seq(0, 1, len = ncol(z)), z.grid, xlim = range(x.grid, finite = TRUE), ylim = range(y.grid, finite = TRUE), zlim = range(z.grid, finite = TRUE), levels = pretty(zlim, nlevels), nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) - 1), plot.title="", plot.axes, key.title, key.axes, asp = NA, xaxs = "i", yaxs = "i", las = 1, axes = TRUE, frame.plot = axes, ...)
{
if (missing(z.grid)) { stop("no 'z.grid' matrix specified") } else if (is.list(x.grid)) { y.grid <- x.grid$y x.grid <- x.grid$x } if (any(diff(x.grid) <= 0) || any(diff(y.grid) <= 0)) stop("increasing 'x.grid' and 'y.grid' values expected")
plot.new() plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)
if (!is.matrix(z.grid) || nrow(z.grid) <= 1 || ncol(z.grid) <= 1) stop("no proper 'z.grid' matrix specified") if (!is.double(z.grid)) storage.mode(z.grid) <- "double" .Internal(filledcontour(as.double(x.grid), as.double(y.grid), z.grid, as.double(levels), col = col))
if (!(missing(x.scatter)) && !(missing(y.scatter))) { plot.xy(xy.coords(x.scatter,y.scatter,NULL,NULL,NULL,NULL), xlim=xlim, ylim=ylim, xlab="", ylab="", asp=asp, type="p", pch=20, cex=0.1) } if (missing(plot.axes)) { if (axes) { title(main=plot.title, xlab=expression(phi), ylab=expression(psi)) axis(1, at=c(-180,-90,0,90,180)) axis(2, at=c(-180,-90,0,90,180)) } } else plot.axes if (frame.plot) box() if (missing(plot.title)) title(...) else plot.title invisible()
} """) print "Done"
import math
def degrees(rad_angle) :
"""Converts and angle in radians to degrees, mapped to the range [-180,180]""" angle = rad_angle * 180 / math.pi #Note this assume the radians angle is positive as that's what MMTK does while angle > 180 : angle = angle - 360 return angle
def next_residue(residue) :
"""Expects an MMTK residue, returns the next residue in the chain, or None""" #Proteins go N terminal --> C terminal #The next reside is bonded to the C of this atom... for a in residue.peptide.C.bondedTo(): if a.parent.parent != residue: return a.parent.parent return None
def residue_amino(residue) :
"""Expects an MMTK residue, returns the three letter amino acid code in upper case""" if residue : return residue.name[0:3].upper() else : return None
def residue_ramachandran_type(residue) :
"""Expects an MMTK residue, returns ramachandran 'type' (General, Glycine, Proline or Pre-Pro)""" if residue_amino(residue)=="GLY" : return rama_GLYCINE elif residue_amino(residue)=="PRO" : return rama_PROLINE elif residue_amino(next_residue(residue))=="PRO" : #exlcudes those that are Pro or Gly return rama_PRE_PRO else : return rama_GENERAL
scatter_phi = dict() scatter_psi = dict() for ramachandran_type in ramachandran_types :
scatter_phi[ramachandran_type]=[] scatter_psi[ramachandran_type]=[]
pdb=sys.argv[1] pdb_filename = "%s.pdb" % pdb
print "Loading PDB file: " + pdb_filename import MMTK.PDB import MMTK.Proteins
- protein = MMTK.Proteins.Protein("1HMP.pdb", model="no_hydrogens")
- Load the PDB file, ignore the hydrogrens, and then build a model of the peptides:
configuration = MMTK.PDB.PDBConfiguration(pdb_filename) configuration.deleteHydrogens() protein = MMTK.Proteins.Protein(configuration.createPeptideChains(model = "no_hydrogens")) for chain in protein :
print chain.name for residue in chain : phi, psi = residue.phiPsi() #print residue.name, phi, psi if phi and psi : ramachandran_type = residue_ramachandran_type(residue) assert ramachandran_type in ramachandran_types scatter_phi[ramachandran_type].append(degrees(phi)) scatter_psi[ramachandran_type].append(degrees(psi)) assert len(scatter_phi) == len(scatter_psi)
print "Done"
- pdf_filename = "../%s.pdf" % pdb
- r.pdf(pdf_filename)
png_filename = "%s.png" % pdb r.png(png_filename)
- To get four plots on one page, you could use :
- r.split_screen([2,2]) #split into two by two screen
- Or:
- r.layout(Numeric.array([[1,2],[3,4]]), respect=True)
- But I went for simply:
r.par(mfrow=[2,2])
for (i,ramachandran_type) in enumerate(ramachandran_types) :
#pdf_filename = "../%s_%s.pdf" % (pdb, ramachandran_type) (rama_levels, rama_colors, rama_filename) = rama_settings[ramachandran_type] print "Loading data file: " + rama_filename, data, lower_bounds, mid_points, upper_bounds = load_data_file(rama_filename) print "Done"
#print "Creating PDF output file: " + pdf_filename, #r.pdf(pdf_filename) #r.plot(scatter_phi, scatter_psi)
print "Generating quadrant %i, %s" % (i+1, ramachandran_type) #r.screen(i+1)
#Use small margins to make the plots nice and big, #and specify a SQUARE plot area (to go with aspect ratio, asp=1) r.par(mar = [2, 2, 2, 2], pty="s")
#This function will do a Ramachandran plot in the next quadrant #which we setup using par(mfrow-...) r.ramachandran_plot(x_scatter=scatter_phi[ramachandran_type], y_scatter=scatter_psi[ramachandran_type], x_grid=mid_points, y_grid=mid_points, z_grid=data, xlim=[-180,180], ylim=[-180,180], asp=1.0, plot_title=ramachandran_type, drawlabels=False, levels=rama_levels, col=rama_colors) print ramachandran_type + " Done"
r.dev_off() print "Done" </source>
more RMSDs
<source lang="bash"> echo -e "1\n1\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix.xpm -dt 10 echo -e "6\n6\n\n"|g_rms -s _md.tpr -f _traj_norot.xtc -f2 _traj_norot.xtc -m _rmsd-matrix_chain-c_b.xpm -dt 10 xpm2ps -f _rmsd-matrix.xpm -o _rmsd-matrix.eps -rainbow blue xpm2ps -f _rmsd-matrix_chain-c_b.xpm -o _rmsd-matrix_chain-c_b.eps -rainbow blue </source>
Cluster Analysis
<source lang="bash"> echo 6 6 | g_cluster -s _md.tpr -f _traj_norot.xtc -dm _rmsd-matrix_chain-c_b.xpm -dist _rmsd-distribution.xvg -o _clusters.xpm -sz _cluster-sizes.xvg -tr _cluster-transitions.xpm -ntr _cluster-transitions.xvg -clid _cluster-id-over-time.xvg -cl _clusters.pdb -cutoff 0.1 -method gromos -dt 10 </source>
Distance RMSD
<source lang="bash"> echo 1|g_rmsdist -s _md.tpr -f _traj_norot.xtc -o _distance-rmsd.xvg </source>