Difference between revisions of "PKU journal Task5"
Line 1: | Line 1: | ||
[[Phenylketonuria]]»[[Researching_SNPs_(PKU)|Researching and mapping point mutations]]»journal |
[[Phenylketonuria]]»[[Researching_SNPs_(PKU)|Researching and mapping point mutations]]»journal |
||
+ | |||
+ | == Preparing the Data== |
||
+ | ===HGMD=== |
||
+ | <source lang="python"> |
||
+ | #/usr/bin/env python |
||
+ | #translate hgmd mutations into standard coding (xxxW>M) |
||
+ | #input copy-and-pasted from search results after PAH on www.hgmd.org. No download/non-html file found... |
||
+ | |||
+ | import sys |
||
+ | |||
+ | snpsfile = sys.argv[1] |
||
+ | snps = {} |
||
+ | |||
+ | for line in open(snpsfile): |
||
+ | codons, position = line.split()[1:3] |
||
+ | offset = 0 |
||
+ | base1, base2 = None, None |
||
+ | codons.replace("a","").replace("t","").replace("c","").replace("g","") |
||
+ | for i in xrange(3): |
||
+ | offset+=1 |
||
+ | if codons[i]!=codons[i+4]: |
||
+ | base1 = codons[i] |
||
+ | base2 = codons[i+4] |
||
+ | break |
||
+ | code = str((int(position)-1)*3+offset)+base1+">"+base2 |
||
+ | print code |
||
+ | snps[code] = True |
||
+ | </source> |
||
+ | |||
+ | ===SNPdbe=== |
||
+ | <source lang="python"> |
||
+ | #!/usr/bin/env python |
||
+ | #snpsfile is the text-file available from the search on http://www.rostlab.org/services/snpdbe/ |
||
+ | import sys |
||
+ | |||
+ | snpsfile = sys.argv[1] |
||
+ | outfile = "SNPdbe/SNPs_effect" |
||
+ | out = open(outfile,'w') |
||
+ | |||
+ | for line in open(snpsfile): |
||
+ | fields = line.split('|') |
||
+ | effect = "effect" |
||
+ | if fields[20].strip().startswith("Pssm wt"): |
||
+ | effect = "Effect_estimate" |
||
+ | elif int(fields[20])<=12: |
||
+ | effect = "no_effect" |
||
+ | out.write(fields[0].strip()+"\t"+effect+"\n") |
||
+ | out.close() |
||
+ | </source> |
||
+ | |||
+ | ===Text-Map=== |
||
+ | The following python script produces the text-only version of our map from source files compiled from three databases. |
||
+ | |||
+ | <source lang="python"> |
||
+ | #/usr/bin/env python |
||
+ | from collections import defaultdict |
||
+ | |||
+ | dna_mutations = defaultdict(dict) |
||
+ | aa_mutations = defaultdict(dict) |
||
+ | |||
+ | disease = "*" |
||
+ | no_disease = "+" |
||
+ | silent = "#" |
||
+ | comment_string = "# *after SNP: disease causing (HGMD), + after SNP: no effect (missense, but not in HGMD),\ |
||
+ | # after SNP: silent mutation, single *: conserved, probably disease effect (SNPdbe), single +: little conserved, probably no effect (SNPdbe)\n#rows tab separated\nAA\tSNPs\tEffect from SNPdbe" |
||
+ | |||
+ | |||
+ | hgmd = "HGMD/hgmd_snps" |
||
+ | dbSNP_silent = "dbSNP/dbSNP_synonymous" |
||
+ | dbSNP_missense = "dbSNP/dbSNP_missense" |
||
+ | SNPdbe = "SNPdbe/SNPs_effect" |
||
+ | |||
+ | def read_input(dna_mutations, aa_mutations): |
||
+ | for line in open(hgmd): |
||
+ | position, mutation = line[:-4],line[-4:].strip() |
||
+ | position = int(position) |
||
+ | if dna_mutations.has_key(position): |
||
+ | dna_mutations[position]["hgmd"].append(mutation) |
||
+ | else: |
||
+ | dna_mutations[position]["hgmd"] = [mutation] |
||
+ | |||
+ | |||
+ | for line in open(dbSNP_silent): |
||
+ | position, mutation = line[:-4],line[-4:].strip() |
||
+ | position = int(position) |
||
+ | if dna_mutations[position].has_key("dbSNP_silent"): |
||
+ | dna_mutations[position]["dbSNP_silent"].append(mutation) |
||
+ | else: |
||
+ | dna_mutations[position]["dbSNP_silent"] = [mutation] |
||
+ | |||
+ | |||
+ | for line in open(dbSNP_missense): |
||
+ | position, mutation = line[:-4],line[-4:].strip() |
||
+ | position = int(position) |
||
+ | if dna_mutations[position].has_key("dbSNP_missense"): |
||
+ | dna_mutations[position]["dbSNP_missense"].append(mutation) |
||
+ | else: |
||
+ | dna_mutations[position]["dbSNP_missense"] = [mutation] |
||
+ | |||
+ | for line in open(SNPdbe): |
||
+ | mut, effect = line.split()[0],line.split()[1].strip() |
||
+ | position = int(mut[1:-1]) |
||
+ | mutation = "%s->%s"%(mut[0],mut[-1]) |
||
+ | if aa_mutations[position].has_key(effect): |
||
+ | aa_mutations[position][effect].append(mutation) |
||
+ | else: |
||
+ | aa_mutations[position][effect] = [mutation] |
||
+ | |||
+ | return dna_mutations, aa_mutations |
||
+ | |||
+ | def output(): |
||
+ | out = "" |
||
+ | for i in xrange(1,454): |
||
+ | aa = "%s\t"%i |
||
+ | for pos in range((i-1)*3+1, (i-1)*3+4): |
||
+ | m = "" |
||
+ | if dna_mutations.has_key(pos): |
||
+ | if dna_mutations[pos].has_key("hgmd"): |
||
+ | for mutation in dna_mutations[pos]["hgmd"]: |
||
+ | m += "%s%s%s "%(pos,mutation,disease)#* for disease causing |
||
+ | if dna_mutations[pos].has_key("dbSNP_missense"): |
||
+ | for mutation in dna_mutations[pos]["dbSNP_missense"]: |
||
+ | if not "%s%s"%(pos,mutation) in m: |
||
+ | m += "%s%s%s "%(pos,mutation, no_disease)#+for /not/ disease causing |
||
+ | if dna_mutations[pos].has_key("dbSNP_silent"): |
||
+ | for mutation in dna_mutations[pos]["dbSNP_silent"]: |
||
+ | if not "%s%s"%(pos,mutation) in m: |
||
+ | m += "%s%s%s "%(pos,mutation, silent)## for silent |
||
+ | if m: |
||
+ | aa += m |
||
+ | if aa[-1]==" ": |
||
+ | aa = aa[:-1] |
||
+ | aa += "\t" |
||
+ | if aa_mutations.has_key(i): |
||
+ | for k in aa_mutations[i].keys(): |
||
+ | if k == "effect": |
||
+ | aa += disease |
||
+ | else: |
||
+ | aa += no_disease |
||
+ | out += aa.strip()+"\n" |
||
+ | return out |
||
+ | |||
+ | dna_mutations, aa_mutations = read_input(dna_mutations, aa_mutations) |
||
+ | print comment_string |
||
+ | print output(), |
||
+ | </source> |
||
== Plot of the mapping == |
== Plot of the mapping == |
Revision as of 00:33, 10 June 2012
Phenylketonuria»Researching and mapping point mutations»journal
Preparing the Data
HGMD
<source lang="python">
- /usr/bin/env python
- translate hgmd mutations into standard coding (xxxW>M)
- input copy-and-pasted from search results after PAH on www.hgmd.org. No download/non-html file found...
import sys
snpsfile = sys.argv[1] snps = {}
for line in open(snpsfile):
codons, position = line.split()[1:3] offset = 0 base1, base2 = None, None codons.replace("a","").replace("t","").replace("c","").replace("g","") for i in xrange(3): offset+=1 if codons[i]!=codons[i+4]: base1 = codons[i] base2 = codons[i+4] break code = str((int(position)-1)*3+offset)+base1+">"+base2 print code snps[code] = True
</source>
SNPdbe
<source lang="python">
- !/usr/bin/env python
- snpsfile is the text-file available from the search on http://www.rostlab.org/services/snpdbe/
import sys
snpsfile = sys.argv[1] outfile = "SNPdbe/SNPs_effect" out = open(outfile,'w')
for line in open(snpsfile):
fields = line.split('|') effect = "effect" if fields[20].strip().startswith("Pssm wt"): effect = "Effect_estimate" elif int(fields[20])<=12: effect = "no_effect" out.write(fields[0].strip()+"\t"+effect+"\n")
out.close() </source>
Text-Map
The following python script produces the text-only version of our map from source files compiled from three databases.
<source lang="python">
- /usr/bin/env python
from collections import defaultdict
dna_mutations = defaultdict(dict) aa_mutations = defaultdict(dict)
disease = "*" no_disease = "+" silent = "#" comment_string = "# *after SNP: disease causing (HGMD), + after SNP: no effect (missense, but not in HGMD),\
# after SNP: silent mutation, single *: conserved, probably disease effect (SNPdbe), single +: little conserved, probably no effect (SNPdbe)\n#rows tab separated\nAA\tSNPs\tEffect from SNPdbe"
hgmd = "HGMD/hgmd_snps"
dbSNP_silent = "dbSNP/dbSNP_synonymous"
dbSNP_missense = "dbSNP/dbSNP_missense"
SNPdbe = "SNPdbe/SNPs_effect"
def read_input(dna_mutations, aa_mutations):
for line in open(hgmd): position, mutation = line[:-4],line[-4:].strip() position = int(position) if dna_mutations.has_key(position): dna_mutations[position]["hgmd"].append(mutation) else: dna_mutations[position]["hgmd"] = [mutation]
for line in open(dbSNP_silent): position, mutation = line[:-4],line[-4:].strip() position = int(position) if dna_mutations[position].has_key("dbSNP_silent"): dna_mutations[position]["dbSNP_silent"].append(mutation) else: dna_mutations[position]["dbSNP_silent"] = [mutation]
for line in open(dbSNP_missense): position, mutation = line[:-4],line[-4:].strip() position = int(position) if dna_mutations[position].has_key("dbSNP_missense"): dna_mutations[position]["dbSNP_missense"].append(mutation) else: dna_mutations[position]["dbSNP_missense"] = [mutation]
for line in open(SNPdbe): mut, effect = line.split()[0],line.split()[1].strip() position = int(mut[1:-1]) mutation = "%s->%s"%(mut[0],mut[-1]) if aa_mutations[position].has_key(effect): aa_mutations[position][effect].append(mutation) else: aa_mutations[position][effect] = [mutation]
return dna_mutations, aa_mutations
def output():
out = "" for i in xrange(1,454): aa = "%s\t"%i for pos in range((i-1)*3+1, (i-1)*3+4): m = "" if dna_mutations.has_key(pos): if dna_mutations[pos].has_key("hgmd"): for mutation in dna_mutations[pos]["hgmd"]: m += "%s%s%s "%(pos,mutation,disease)#* for disease causing if dna_mutations[pos].has_key("dbSNP_missense"): for mutation in dna_mutations[pos]["dbSNP_missense"]: if not "%s%s"%(pos,mutation) in m: m += "%s%s%s "%(pos,mutation, no_disease)#+for /not/ disease causing if dna_mutations[pos].has_key("dbSNP_silent"): for mutation in dna_mutations[pos]["dbSNP_silent"]: if not "%s%s"%(pos,mutation) in m: m += "%s%s%s "%(pos,mutation, silent)## for silent if m: aa += m if aa[-1]==" ": aa = aa[:-1] aa += "\t" if aa_mutations.has_key(i): for k in aa_mutations[i].keys(): if k == "effect": aa += disease else: aa += no_disease out += aa.strip()+"\n" return out
dna_mutations, aa_mutations = read_input(dna_mutations, aa_mutations) print comment_string print output(), </source>
Plot of the mapping
We used the java library Graphics2D here and apart from some parsing, and preprocessing of the data, the plot is pretty much the following class. For the complete sourcecode please write to Sebastian.
<source lang="java"> package SNPMapping;
import java.awt.Color; import java.awt.Font; import java.awt.Graphics; import java.awt.Graphics2D; import java.awt.geom.RoundRectangle2D; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import javax.imageio.ImageIO;
/**
* Created on 09.06.2012, 15:03:25 * @author Sebastian Hollizeck <sebastian.hollizeck@campus.lmu.de> */
public class MapPainter {
public static void paint(LinkedList<Residue> input,String output) throws IOException { int multi = 10; //get maximal residue position Residue last = input.peekLast(); int max = last.getPosition(); max *= multi; //drawing default picture; BufferedImage off_Image = new BufferedImage(900, max + 150, BufferedImage.TYPE_INT_ARGB); Graphics2D g2 = off_Image.createGraphics(); g2.setBackground(Color.white); g2.setColor(Color.white); g2.fillRect(0, 0, max + 100, max + 150); RoundRectangle2D sequence = new RoundRectangle2D.Double(70, 100, 20, max, 10, 10); g2.setColor(Color.black); g2.draw(sequence); Font original = g2.getFont(); //print caption Font caption = new Font("SanSerif",Font.BOLD,24); g2.setFont(caption); g2.drawString("SNP mapping for PAH",300,50);
//revert font g2.setFont(original);
for (Residue r : input) { int pos = r.getPosition(); ArrayList<String> muts = r.getMutations(); String effect = r.getEffect(); int paintpos = 100 + pos * multi; if (pos % 10 == 0) { g2.drawString(pos + "", 40, paintpos); } if (muts == null&&effect==null) { //muts is only null if nothing but the position is in the file; continue; }
g2.drawLine(70, paintpos, 140, paintpos); int offset = 75; int count = 1; for (String s : muts) { if (s.endsWith("*")) { g2.setColor(Color.red); } else if (s.endsWith("+")) { g2.setColor(Color.orange); } else if (s.endsWith("#")) { g2.setColor(Color.green); } if (s.length() > 1) { String mut = s.substring(0, s.length() - 1); g2.drawString(mut, 90 + offset * count, paintpos); count++; } } g2.setColor(Color.black); if (effect != null) { if (effect.equals("*")) { g2.setColor(Color.red); g2.drawString("conserved, probably disease effect", 90 + offset * 6, paintpos); } else if (effect.equals("+")) { g2.setColor(Color.green); g2.drawString("not conserved, little effect", 90 + offset * 6, paintpos); } } g2.setColor(Color.black); }
File out = new File(output); ImageIO.write(off_Image, "png", out); }
}
</source>