Difference between revisions of "PKU journal Task5"

From Bioinformatikpedia
(Created page with "== Plot of the mapping == We used the java library Graphics2D here and apart from some parsing, and preprocessing of the data, the plot is pretty much the following class. For th…")
 
(Plot of the mapping)
 
(4 intermediate revisions by 2 users not shown)
Line 1: Line 1:
  +
[[Phenylketonuria]]»[[Researching_SNPs_(PKU)|Researching and mapping point mutations]]»journal
  +
  +
== Preparing the Data==
  +
===HGMD===
  +
<source lang="python">
  +
#/usr/bin/env python
  +
#translate hgmd mutations into standard coding (xxxW>M)
  +
#input copy-and-pasted from search results after PAH on www.hgmd.org. No download/non-html file found...
  +
  +
import sys
  +
  +
snpsfile = sys.argv[1]
  +
snps = {}
  +
  +
for line in open(snpsfile):
  +
codons, position = line.split()[1:3]
  +
offset = 0
  +
base1, base2 = None, None
  +
codons.replace("a","").replace("t","").replace("c","").replace("g","")
  +
for i in xrange(3):
  +
offset+=1
  +
if codons[i]!=codons[i+4]:
  +
base1 = codons[i]
  +
base2 = codons[i+4]
  +
break
  +
code = str((int(position)-1)*3+offset)+base1+">"+base2
  +
print code
  +
snps[code] = True
  +
</source>
  +
  +
===SNPdbe===
  +
<source lang="python">
  +
#!/usr/bin/env python
  +
#snpsfile is the text-file available from the search on http://www.rostlab.org/services/snpdbe/
  +
import sys
  +
  +
snpsfile = sys.argv[1]
  +
outfile = "SNPdbe/SNPs_effect"
  +
out = open(outfile,'w')
  +
  +
for line in open(snpsfile):
  +
fields = line.split('|')
  +
effect = "effect"
  +
if fields[20].strip().startswith("Pssm wt"):
  +
effect = "Effect_estimate"
  +
elif int(fields[20])<=12:
  +
effect = "no_effect"
  +
out.write(fields[0].strip()+"\t"+effect+"\n")
  +
out.close()
  +
</source>
  +
  +
===Text-Map===
  +
The following python script produces the text-only version of our map from source files compiled from three databases.
  +
  +
<source lang="python">
  +
#/usr/bin/env python
  +
from collections import defaultdict
  +
  +
dna_mutations = defaultdict(dict)
  +
aa_mutations = defaultdict(dict)
  +
  +
disease = "*"
  +
no_disease = "+"
  +
silent = "#"
  +
comment_string = "# *after SNP: disease causing (HGMD), + after SNP: no effect (missense, but not in HGMD),\
  +
# after SNP: silent mutation, single *: conserved, probably disease effect (SNPdbe), single +: little conserved, probably no effect (SNPdbe)\n#rows tab separated\nAA\tSNPs\tEffect from SNPdbe"
  +
  +
  +
hgmd = "HGMD/hgmd_snps"
  +
dbSNP_silent = "dbSNP/dbSNP_synonymous"
  +
dbSNP_missense = "dbSNP/dbSNP_missense"
  +
SNPdbe = "SNPdbe/SNPs_effect"
  +
  +
def read_input(dna_mutations, aa_mutations):
  +
for line in open(hgmd):
  +
position, mutation = line[:-4],line[-4:].strip()
  +
position = int(position)
  +
if dna_mutations.has_key(position):
  +
dna_mutations[position]["hgmd"].append(mutation)
  +
else:
  +
dna_mutations[position]["hgmd"] = [mutation]
  +
  +
  +
for line in open(dbSNP_silent):
  +
position, mutation = line[:-4],line[-4:].strip()
  +
position = int(position)
  +
if dna_mutations[position].has_key("dbSNP_silent"):
  +
dna_mutations[position]["dbSNP_silent"].append(mutation)
  +
else:
  +
dna_mutations[position]["dbSNP_silent"] = [mutation]
  +
  +
  +
for line in open(dbSNP_missense):
  +
position, mutation = line[:-4],line[-4:].strip()
  +
position = int(position)
  +
if dna_mutations[position].has_key("dbSNP_missense"):
  +
dna_mutations[position]["dbSNP_missense"].append(mutation)
  +
else:
  +
dna_mutations[position]["dbSNP_missense"] = [mutation]
  +
  +
for line in open(SNPdbe):
  +
mut, effect = line.split()[0],line.split()[1].strip()
  +
position = int(mut[1:-1])
  +
mutation = "%s->%s"%(mut[0],mut[-1])
  +
if aa_mutations[position].has_key(effect):
  +
aa_mutations[position][effect].append(mutation)
  +
else:
  +
aa_mutations[position][effect] = [mutation]
  +
  +
return dna_mutations, aa_mutations
  +
  +
def output():
  +
out = ""
  +
for i in xrange(1,454):
  +
aa = "%s\t"%i
  +
for pos in range((i-1)*3+1, (i-1)*3+4):
  +
m = ""
  +
if dna_mutations.has_key(pos):
  +
if dna_mutations[pos].has_key("hgmd"):
  +
for mutation in dna_mutations[pos]["hgmd"]:
  +
m += "%s%s%s "%(pos,mutation,disease)#* for disease causing
  +
if dna_mutations[pos].has_key("dbSNP_missense"):
  +
for mutation in dna_mutations[pos]["dbSNP_missense"]:
  +
if not "%s%s"%(pos,mutation) in m:
  +
m += "%s%s%s "%(pos,mutation, no_disease)#+for /not/ disease causing
  +
if dna_mutations[pos].has_key("dbSNP_silent"):
  +
for mutation in dna_mutations[pos]["dbSNP_silent"]:
  +
if not "%s%s"%(pos,mutation) in m:
  +
m += "%s%s%s "%(pos,mutation, silent)## for silent
  +
if m:
  +
aa += m
  +
if aa[-1]==" ":
  +
aa = aa[:-1]
  +
aa += "\t"
  +
if aa_mutations.has_key(i):
  +
for k in aa_mutations[i].keys():
  +
if k == "effect":
  +
aa += disease
  +
else:
  +
aa += no_disease
  +
out += aa.strip()+"\n"
  +
return out
  +
  +
dna_mutations, aa_mutations = read_input(dna_mutations, aa_mutations)
  +
print comment_string
  +
print output(),
  +
</source>
  +
 
== Plot of the mapping ==
 
== Plot of the mapping ==
 
We used the java library Graphics2D here and apart from some parsing, and preprocessing of the data, the plot is pretty much the following class. For the complete sourcecode please write to [mailto:sebastian.hollizeck@campus.lmu.de Sebastian].
 
We used the java library Graphics2D here and apart from some parsing, and preprocessing of the data, the plot is pretty much the following class. For the complete sourcecode please write to [mailto:sebastian.hollizeck@campus.lmu.de Sebastian].
   
 
<source lang="java">
 
<source lang="java">
package SNPMapping;
 
   
  +
import java.awt.BasicStroke;
 
import java.awt.Color;
 
import java.awt.Color;
 
import java.awt.Font;
 
import java.awt.Font;
 
import java.awt.Graphics;
 
import java.awt.Graphics;
 
import java.awt.Graphics2D;
 
import java.awt.Graphics2D;
  +
import java.awt.Stroke;
 
import java.awt.geom.RoundRectangle2D;
 
import java.awt.geom.RoundRectangle2D;
 
import java.awt.image.BufferedImage;
 
import java.awt.image.BufferedImage;
Line 24: Line 173:
 
public class MapPainter {
 
public class MapPainter {
   
public static void paint(LinkedList<Residue> input,String output) throws IOException {
+
public static void paint(LinkedList<Residue> input, String output) throws IOException {
 
int multi = 10;
 
int multi = 10;
 
//get maximal residue position
 
//get maximal residue position
Line 39: Line 188:
 
g2.setColor(Color.black);
 
g2.setColor(Color.black);
 
g2.draw(sequence);
 
g2.draw(sequence);
  +
 
 
Font original = g2.getFont();
 
Font original = g2.getFont();
  +
Color green = new Color(0, 100, 0);
  +
Color red = Color.RED;
  +
Color orange = new Color(255, 165, 0);
 
//print caption
 
//print caption
Font caption = new Font("SanSerif",Font.BOLD,24);
+
Font caption = new Font("SanSerif", Font.BOLD, 24);
 
g2.setFont(caption);
 
g2.setFont(caption);
g2.drawString("SNP mapping for PAH",300,50);
+
g2.drawString("SNP mapping for PAH", 300, 50);
   
 
//revert font
 
//revert font
Line 57: Line 209:
 
g2.drawString(pos + "", 40, paintpos);
 
g2.drawString(pos + "", 40, paintpos);
 
}
 
}
if (muts == null&&effect==null) {
+
if (muts == null && effect == null) {
 
//muts is only null if nothing but the position is in the file;
 
//muts is only null if nothing but the position is in the file;
 
continue;
 
continue;
Line 67: Line 219:
 
for (String s : muts) {
 
for (String s : muts) {
 
if (s.endsWith("*")) {
 
if (s.endsWith("*")) {
g2.setColor(Color.red);
+
g2.setColor(red);
 
} else if (s.endsWith("+")) {
 
} else if (s.endsWith("+")) {
g2.setColor(Color.orange);
+
g2.setColor(orange);
 
} else if (s.endsWith("#")) {
 
} else if (s.endsWith("#")) {
g2.setColor(Color.green);
+
g2.setColor(green);
 
}
 
}
 
if (s.length() > 1) {
 
if (s.length() > 1) {
Line 79: Line 231:
 
}
 
}
 
}
 
}
g2.setColor(Color.black);
+
g2.setColor(Color.BLACK);
  +
 
if (effect != null) {
 
if (effect != null) {
  +
//only if distance is big
  +
if (((80 + offset * 6) - (100 + offset * count)) > 20) {
  +
g2.setColor(Color.LIGHT_GRAY);
  +
Stroke origStroke = g2.getStroke();
  +
//stroke for better viewability
  +
Stroke s = new BasicStroke(
  +
0.5f, // Width
  +
BasicStroke.CAP_SQUARE, // End cap
  +
BasicStroke.JOIN_MITER, // Join style
  +
10.0f, // Miter limit
  +
new float[]{21.0f, 9.0f, 3.0f, 9.0f}, // Dash pattern
  +
0.0f); // Dash phase
  +
g2.setStroke(s);
  +
g2.drawLine(90 + offset * count, paintpos, 80 + offset * 6, paintpos);
  +
//revert line
  +
g2.setStroke(origStroke);
  +
g2.setColor(Color.BLACK);
  +
}
 
if (effect.equals("*")) {
 
if (effect.equals("*")) {
g2.setColor(Color.red);
+
g2.setColor(red);
 
g2.drawString("conserved, probably disease effect", 90 + offset * 6, paintpos);
 
g2.drawString("conserved, probably disease effect", 90 + offset * 6, paintpos);
 
} else if (effect.equals("+")) {
 
} else if (effect.equals("+")) {
g2.setColor(Color.green);
+
g2.setColor(green);
 
g2.drawString("not conserved, little effect", 90 + offset * 6, paintpos);
 
g2.drawString("not conserved, little effect", 90 + offset * 6, paintpos);
 
}
 
}
Line 98: Line 269:
 
}
 
}
 
}
 
}
 
 
</source>
 
</source>
  +
  +
  +
[[Category: Phenylketonuria 2012]]

Latest revision as of 11:56, 29 August 2012

Phenylketonuria»Researching and mapping point mutations»journal

Preparing the Data

HGMD

<source lang="python">

  1. /usr/bin/env python
  2. translate hgmd mutations into standard coding (xxxW>M)
  3. input copy-and-pasted from search results after PAH on www.hgmd.org. No download/non-html file found...

import sys

snpsfile = sys.argv[1] snps = {}

for line in open(snpsfile):

   codons, position = line.split()[1:3]
   offset = 0
   base1, base2 = None, None
   codons.replace("a","").replace("t","").replace("c","").replace("g","")
   for i in xrange(3):
       offset+=1
       if codons[i]!=codons[i+4]:
           base1 = codons[i]
           base2 = codons[i+4]
           break
   code = str((int(position)-1)*3+offset)+base1+">"+base2
   print code
   snps[code] = True 

</source>

SNPdbe

<source lang="python">

  1. !/usr/bin/env python
  2. snpsfile is the text-file available from the search on http://www.rostlab.org/services/snpdbe/

import sys

snpsfile = sys.argv[1] outfile = "SNPdbe/SNPs_effect" out = open(outfile,'w')

for line in open(snpsfile):

   fields = line.split('|')
   effect = "effect"
   if fields[20].strip().startswith("Pssm wt"):
       effect = "Effect_estimate"  
   elif int(fields[20])<=12:
       effect = "no_effect"
   out.write(fields[0].strip()+"\t"+effect+"\n")

out.close() </source>

Text-Map

The following python script produces the text-only version of our map from source files compiled from three databases.

<source lang="python">

  1. /usr/bin/env python

from collections import defaultdict

dna_mutations = defaultdict(dict) aa_mutations = defaultdict(dict)

disease = "*" no_disease = "+" silent = "#" comment_string = "# *after SNP: disease causing (HGMD), + after SNP: no effect (missense, but not in HGMD),\

# after SNP: silent mutation, single *: conserved, probably disease effect (SNPdbe), single +: little conserved, probably no effect (SNPdbe)\n#rows tab separated\nAA\tSNPs\tEffect from SNPdbe"


hgmd = "HGMD/hgmd_snps" dbSNP_silent = "dbSNP/dbSNP_synonymous" dbSNP_missense = "dbSNP/dbSNP_missense" SNPdbe = "SNPdbe/SNPs_effect"

def read_input(dna_mutations, aa_mutations):

   for line in open(hgmd):
       position, mutation = line[:-4],line[-4:].strip()
       position = int(position)
       if dna_mutations.has_key(position):
           dna_mutations[position]["hgmd"].append(mutation)
       else:
           dna_mutations[position]["hgmd"] = [mutation]


   for line in open(dbSNP_silent):
       position, mutation = line[:-4],line[-4:].strip()
       position = int(position)
       if dna_mutations[position].has_key("dbSNP_silent"):
           dna_mutations[position]["dbSNP_silent"].append(mutation)
       else:
           dna_mutations[position]["dbSNP_silent"] = [mutation]


   for line in open(dbSNP_missense):
       position, mutation = line[:-4],line[-4:].strip()
       position = int(position)
       if dna_mutations[position].has_key("dbSNP_missense"):
           dna_mutations[position]["dbSNP_missense"].append(mutation)
       else:
           dna_mutations[position]["dbSNP_missense"] = [mutation]
   for line in open(SNPdbe):
       mut, effect = line.split()[0],line.split()[1].strip()
       position = int(mut[1:-1])
       mutation = "%s->%s"%(mut[0],mut[-1])
       if aa_mutations[position].has_key(effect):
           aa_mutations[position][effect].append(mutation)
       else:
           aa_mutations[position][effect] = [mutation]
   return dna_mutations, aa_mutations

def output():

   out = ""
   for i in xrange(1,454):
       aa = "%s\t"%i
       for pos in range((i-1)*3+1, (i-1)*3+4):
           m = ""
           if dna_mutations.has_key(pos):
               if dna_mutations[pos].has_key("hgmd"):
                   for mutation in dna_mutations[pos]["hgmd"]:
                       m += "%s%s%s "%(pos,mutation,disease)#* for disease causing
               if dna_mutations[pos].has_key("dbSNP_missense"):
                   for mutation in dna_mutations[pos]["dbSNP_missense"]:
                       if not "%s%s"%(pos,mutation) in m:
                           m += "%s%s%s "%(pos,mutation, no_disease)#+for /not/ disease causing
               if dna_mutations[pos].has_key("dbSNP_silent"):
                   for mutation in dna_mutations[pos]["dbSNP_silent"]:
                       if not "%s%s"%(pos,mutation) in m:
                           m += "%s%s%s "%(pos,mutation, silent)## for silent
           if m:
               aa += m
       if aa[-1]==" ":
           aa = aa[:-1]
       aa += "\t"
       if aa_mutations.has_key(i):
           for k in aa_mutations[i].keys():
               if k == "effect":
                   aa += disease
               else:
                   aa += no_disease
       out += aa.strip()+"\n"
   return out

dna_mutations, aa_mutations = read_input(dna_mutations, aa_mutations) print comment_string print output(), </source>

Plot of the mapping

We used the java library Graphics2D here and apart from some parsing, and preprocessing of the data, the plot is pretty much the following class. For the complete sourcecode please write to Sebastian.

<source lang="java">

import java.awt.BasicStroke; import java.awt.Color; import java.awt.Font; import java.awt.Graphics; import java.awt.Graphics2D; import java.awt.Stroke; import java.awt.geom.RoundRectangle2D; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import javax.imageio.ImageIO;

/**

* Created on 09.06.2012, 15:03:25
* @author Sebastian Hollizeck <sebastian.hollizeck@campus.lmu.de>
*/

public class MapPainter {

   public static void paint(LinkedList<Residue> input, String output) throws IOException {
       int multi = 10;
       //get maximal residue position
       Residue last = input.peekLast();
       int max = last.getPosition();
       max *= multi;
       //drawing default picture;
       BufferedImage off_Image = new BufferedImage(900, max + 150, BufferedImage.TYPE_INT_ARGB);
       Graphics2D g2 = off_Image.createGraphics();
       g2.setBackground(Color.white);
       g2.setColor(Color.white);
       g2.fillRect(0, 0, max + 100, max + 150);
       RoundRectangle2D sequence = new RoundRectangle2D.Double(70, 100, 20, max, 10, 10);
       g2.setColor(Color.black);
       g2.draw(sequence);
       Font original = g2.getFont();
       Color green = new Color(0, 100, 0);
       Color red = Color.RED;
       Color orange = new Color(255, 165, 0);
       //print caption
       Font caption = new Font("SanSerif", Font.BOLD, 24);
       g2.setFont(caption);
       g2.drawString("SNP mapping for PAH", 300, 50);
       //revert font
       g2.setFont(original);
       for (Residue r : input) {
           int pos = r.getPosition();
           ArrayList<String> muts = r.getMutations();
           String effect = r.getEffect();
           int paintpos = 100 + pos * multi;
           if (pos % 10 == 0) {
               g2.drawString(pos + "", 40, paintpos);
           }
           if (muts == null && effect == null) {
               //muts is only null if nothing but the position is in the file;
               continue;
           }
           g2.drawLine(70, paintpos, 140, paintpos);
           int offset = 75;
           int count = 1;
           for (String s : muts) {
               if (s.endsWith("*")) {
                   g2.setColor(red);
               } else if (s.endsWith("+")) {
                   g2.setColor(orange);
               } else if (s.endsWith("#")) {
                   g2.setColor(green);
               }
               if (s.length() > 1) {
                   String mut = s.substring(0, s.length() - 1);
                   g2.drawString(mut, 90 + offset * count, paintpos);
                   count++;
               }
           }
           g2.setColor(Color.BLACK);
           if (effect != null) {
               //only if distance is big
               if (((80 + offset * 6) - (100 + offset * count)) > 20) {
                   g2.setColor(Color.LIGHT_GRAY);
                   Stroke origStroke = g2.getStroke();
                   //stroke for better viewability
                   Stroke s = new BasicStroke(
                           0.5f, // Width
                           BasicStroke.CAP_SQUARE, // End cap
                           BasicStroke.JOIN_MITER, // Join style
                           10.0f, // Miter limit
                           new float[]{21.0f, 9.0f, 3.0f, 9.0f}, // Dash pattern
                           0.0f);                     // Dash phase
                   g2.setStroke(s);
                   g2.drawLine(90 + offset * count, paintpos, 80 + offset * 6, paintpos);
                   //revert line
                   g2.setStroke(origStroke);
                   g2.setColor(Color.BLACK);
               }
               if (effect.equals("*")) {
                   g2.setColor(red);
                   g2.drawString("conserved, probably disease effect", 90 + offset * 6, paintpos);
               } else if (effect.equals("+")) {
                   g2.setColor(green);
                   g2.drawString("not conserved, little effect", 90 + offset * 6, paintpos);
               }
           }
           g2.setColor(Color.black);
       }


       File out = new File(output);
       ImageIO.write(off_Image, "png", out);
   }

} </source>