Difference between revisions of "Task5 Hemochromatosis Protocol"

From Bioinformatikpedia
(SNPdbe)
(SNPdbe)
 
(4 intermediate revisions by the same user not shown)
Line 17: Line 17:
   
 
We used the non-professional version of [http://www.hgmd.org/ HGMD] and looked for SNPs using the gene symbol HFE as query.
 
We used the non-professional version of [http://www.hgmd.org/ HGMD] and looked for SNPs using the gene symbol HFE as query.
  +
  +
In order to get the SNP statistics the following script was used:
  +
  +
<source lang="perl">
  +
#!/usr/local/bin/perl
  +
use strict;
  +
use warnings;
  +
use diagnostics;
  +
use sigtrap;
  +
use autodie;
  +
  +
my $inFile = "HFE.htm";
  +
my $outFile = "HGMD.txt";
  +
  +
  +
open (IN, "<", $inFile) or die ("Could not open input file!");
  +
open (OUT, ">", $outFile) or die ("Could not open output file!");
  +
  +
#print header
  +
print OUT "ID\tType\tCodon change\tAmino acid change\tPosition\tPhenotype\tReference\n";
  +
  +
while (<IN>)
  +
{
  +
my $line = $_;
  +
my $dummy = $line;
  +
  +
$dummy=~s/\s//gi;
  +
  +
if ($dummy ne "")
  +
{
  +
my @content = split(/<\/td>/, $line);
  +
  +
my $id = $content[0];
  +
my $type;
  +
my $codon = $content[1];
  +
my $aa = $content[2];
  +
my $pos = $content[3];
  +
my $phenotype = $content[5];
  +
my $ref = $content[6];
  +
my $link;
  +
  +
$id =~m/>(\w+?)</gi;
  +
$id = $1;
  +
  +
$codon =~m/>([\w-]+?)$/gi;
  +
$codon = $1;
  +
  +
$aa =~m/>([\w-]+?)$/gi;
  +
$aa = $1;
  +
  +
$pos =~m/>(\d+?)$/gi;
  +
$pos = $1;
  +
  +
$phenotype =~m/>([\w\s,?]+?)$/gi;
  +
$phenotype = $1;
  +
  +
$ref =~m/^<.*?href="(.+?)">(.*?)<\/a>/gi;
  +
$link = $1;
  +
$ref = $2;
  +
$ref =~ s/<.*?>//gi;
  +
  +
if ($aa =~m/Term/gi)
  +
{
  +
$type = "Nonsense";
  +
}
  +
elsif ($aa =~m/^(.+?)-(.+?)$/g && $1 eq $2)
  +
{
  +
$type = "Synonymous";
  +
}
  +
else
  +
{
  +
$type = "Non-synonymous";
  +
}
  +
  +
print OUT "$id\t$type\t$codon\t$aa\t$pos\t$phenotype\t[$link $ref]\n";
  +
}
  +
}
  +
  +
close IN;
  +
close OUT;
  +
</source>
   
 
<br style="clear:both;">
 
<br style="clear:both;">
Line 28: Line 109:
   
   
We've searched [http://www.rostlab.org/services/snpdbe/ SNPdbe] for SNPs using [http://www.ncbi.nlm.nih.gov/protein/NP_000401 NP_000401] and [http://www.uniprot.org/uniprot/Q30201 Q30201] as query IDs.
+
We've searched [http://www.rostlab.org/services/snpdbe/ SNPdbe] for SNPs using [http://www.ncbi.nlm.nih.gov/protein/NP_000401 NP_000401], [http://www.uniprot.org/uniprot/Q30201 Q30201], and HFE as query IDs.
 
* Quick-Link: [http://www.rostlab.org/services/snpdbe/dosearch.php?id=name&val=NP_000401&organism2=human&organism1= NP_000401]
 
* Quick-Link: [http://www.rostlab.org/services/snpdbe/dosearch.php?id=name&val=NP_000401&organism2=human&organism1= NP_000401]
 
* Quick-Link: [http://www.rostlab.org/services/snpdbe/dosearch.php?id=name&val=Q30201&organism2=human&organism1= Q30201]
 
* Quick-Link: [http://www.rostlab.org/services/snpdbe/dosearch.php?id=name&val=Q30201&organism2=human&organism1= Q30201]
  +
* Quick-Link: [http://www.rostlab.org/services/snpdbe/dosearch.php?id=name&val=HFE&organism2=human&organism1= HFE]
  +
  +
The SNP data was extracted from the textfiles with a perl script:
  +
  +
<source lang="perl">
  +
#!/usr/local/bin/perl
  +
use strict;
  +
use warnings;
  +
use diagnostics;
  +
use sigtrap;
  +
use autodie;
  +
  +
#my $inFile = "snpsQ30201.txt";
  +
#my $outFile = "snpsQ30201_out.txt";
  +
#my $seqFile = "snpsQ30201_seq.txt";
  +
#my $inFile = "snpsNP000401.txt";
  +
#my $outFile = "snpsNP000401_out.txt";
  +
#my $seqFile = "snpsNP000401_seq.txt";
  +
my $inFile = "snpsHFE.txt";
  +
my $outFile = "snpsHFE_out.txt";
  +
my $seqFile = "snpsHFE_seq.txt";
  +
  +
my %mutationToPos;
  +
my %mutationToLine;
  +
my %uniqueSeq;
  +
  +
  +
open (IN, "<", $inFile) or die;
  +
open (OUT, ">", $outFile) or die;
  +
open (SEQ, ">", $seqFile) or die;
  +
  +
my $line = <IN>;
  +
  +
print OUT "dbSNP ID\tSequence\tType\tAmino acid change\tPosition\tEffect\tExperimental evidence\n";
  +
  +
  +
while (<IN>)
  +
{
  +
my $line = $_;
  +
my @content = split(/\|/, $line);
  +
  +
my $name = $content[0];
  +
my $dbSNP = $content[1];
  +
my $gene = $content[6];
  +
my $protein = $content[7];
  +
my $seq = $content[10];
  +
my $disease = $content[19];
  +
my $wt = $content[21];
  +
my $mt = $content[24];
  +
my $effect = "TODO";
  +
my $evidence = $content[31];
  +
  +
if ($gene eq "HFE")
  +
{
  +
$name=~m/(\D)(\d+?)(\D)/gi;
  +
  +
my $pos = $2;
  +
my $first = oneTo3($1);
  +
my $last = oneTo3($3);
  +
my $type;
  +
  +
if ($first eq $last)
  +
{
  +
$type = "Synonymous";
  +
}
  +
else
  +
{
  +
$type = "Non-synonymous";
  +
}
  +
  +
if ($disease =~m/h(a)*emochromatosis/gi || $wt >= 40)
  +
{
  +
$effect = "Disease causing";
  +
}
  +
else
  +
{
  +
$effect = "No effect";
  +
}
  +
  +
if (defined $uniqueSeq{$protein} && $uniqueSeq{$protein} ne $seq)
  +
{
  +
print "Warning: different sequences for same protein id: $protein";
  +
}
  +
  +
$uniqueSeq{$protein} = $seq;
  +
  +
print OUT "$dbSNP\t$protein\t$type\t$first-$last\t$pos\t$effect\t$evidence\n";
  +
}
  +
}
  +
  +
print SEQ ">sp|Q30201|HFE_HUMAN\n";
  +
print SEQ "...\n"; #sequence removed for protocol site (too long)
  +
  +
for my $key (keys %uniqueSeq)
  +
{
  +
print SEQ ">$key\n";
  +
print SEQ "$uniqueSeq{$key}\n";
  +
}
  +
  +
close IN;
  +
close OUT;
  +
close SEQ;
  +
  +
  +
sub oneTo3
  +
{
  +
my $single = $_[0];
  +
  +
if ($single eq "A"){return "Ala";}
  +
if ($single eq "R"){return "Arg";}
  +
if ($single eq "N"){return "Asn";}
  +
if ($single eq "D"){return "Asp";}
  +
if ($single eq "C"){return "Cys";}
  +
if ($single eq "E"){return "Glu";}
  +
if ($single eq "Q"){return "Gln";}
  +
if ($single eq "G"){return "Gly";}
  +
if ($single eq "H"){return "His";}
  +
if ($single eq "I"){return "Ile";}
  +
if ($single eq "L"){return "Leu";}
  +
if ($single eq "K"){return "Lys";}
  +
if ($single eq "M"){return "Met";}
  +
if ($single eq "F"){return "Phe";}
  +
if ($single eq "P"){return "Pro";}
  +
if ($single eq "S"){return "Ser";}
  +
if ($single eq "T"){return "Thr";}
  +
if ($single eq "W"){return "Trp";}
  +
if ($single eq "Y"){return "Tyr";}
  +
if ($single eq "V"){return "Val";}
  +
  +
return "XXX";
  +
}
  +
</source>
  +
  +
  +
For the HFE search the positions had to be corrected for the reference sequence:
  +
  +
<source lang="perl">
  +
#!/usr/local/bin/perl
  +
use strict;
  +
use warnings;
  +
use diagnostics;
  +
use sigtrap;
  +
use autodie;
  +
  +
my $inFile = "snpsHFE_out.txt";
  +
my $alnFile = "snpsHFE.fasta_aln";
  +
my $outFile = "snpsHFE_cor.txt";
  +
  +
my %lineToPos;
  +
  +
my @refSeq = split(//, "..."); #sequence removed for protocol site (too long)
  +
  +
open (IN, "<", $inFile) or die;
  +
open (OUT, ">", $outFile) or die;
  +
  +
my $header = <IN>;
  +
  +
print OUT $header;
  +
  +
while (<IN>)
  +
{
  +
my $line = $_;
  +
my @content = split(/\t/, $line);
  +
  +
my $mutation = $content[3];
  +
my $pos = $content[4];
  +
my $protein = $content[1];
  +
  +
$pos = makePosMap($protein, $pos);
  +
$content[4] = $pos;
  +
$line = "";
  +
  +
#rebuild line;
  +
for my $item (@content)
  +
{
  +
$line = $line.$item;
  +
  +
if ($item ne $content[scalar(@content)-1])
  +
{
  +
$line = $line."\t";
  +
}
  +
}
  +
  +
print OUT $line;
  +
}
  +
  +
close IN;
  +
close OUT;
  +
  +
  +
  +
sub makePosMap
  +
{
  +
my $protein = $_[0];
  +
my $oldPos = $_[1];
  +
my $seq = "";
  +
  +
open (ALN, "<", $alnFile) or die;
  +
  +
while (<ALN>)
  +
{
  +
my $line = $_;
  +
  +
if ($line =~m/^>$protein/)
  +
{
  +
while ($line = <ALN>)
  +
{
  +
if ($line =~m/^>/)
  +
{
  +
last;
  +
}
  +
else
  +
{
  +
$line =~s/\s//gi;
  +
$seq = $seq.$line;
  +
}
  +
}
  +
last;
  +
}
  +
}
  +
  +
close ALN;
  +
  +
my @chars = split(//, $seq);
  +
my $relPos = 1;
  +
my $newPos = 1;
  +
  +
#assuming no gaps in the MSA's reference sequence
  +
  +
for (my $pos = 0; $pos < scalar(@chars); $pos++)
  +
{
  +
if ($chars[$pos] ne "-")
  +
{
  +
if ($oldPos == $relPos)
  +
{
  +
$newPos = $pos+1;
  +
  +
last;
  +
}
  +
  +
$relPos++;
  +
}
  +
elsif ($oldPos == $relPos)
  +
{
  +
print "Something went wrong! Given position is a gap!\n";
  +
}
  +
}
  +
  +
if ($chars[$newPos-1] ne $refSeq[$newPos-1])
  +
{
  +
print "Residue at mutation position differs from reference sequence! $chars[$newPos]<>$refSeq[$newPos]\n";
  +
}
  +
  +
return $newPos;
  +
}
  +
</source>
  +
  +
  +
In the last step all search results were combined, filtered for unique results, and sorted by their position:
  +
  +
<source lang="perl">
  +
#!/usr/local/bin/perl
  +
use strict;
  +
use warnings;
  +
use diagnostics;
  +
use sigtrap;
  +
use autodie;
  +
  +
my @inFiles = ("snpsNP000401_out.txt", "snpsQ30201_out.txt", "snpsHFE_cor.txt");
  +
my $outFile = "SNPdbe.txt";
  +
  +
my %mutationToID;
  +
my %mutationToSeq;
  +
my %mutationToType;
  +
my %mutationToAA;
  +
my %mutationToPos;
  +
my %mutationToEffect;
  +
my %mutationToEvidence;
  +
  +
  +
for my $file (@inFiles)
  +
{
  +
open (IN, "<", $file) or die;
  +
  +
my $line = <IN>;
  +
my @content;
  +
  +
while (<IN>)
  +
{
  +
$line = $_;
  +
@content = split(/\t/, $line);
  +
  +
my $id = $content[0];
  +
my $seq = $content[1];
  +
my $type = $content[2];
  +
my $aa = $content[3];
  +
my $pos = $content[4];
  +
my $effect = $content[5];
  +
my $evidence = $content[6];
  +
my $unique = $aa.$pos;
  +
  +
$evidence =~s/by//gi;
  +
$evidence =~s/\s//gi;
  +
  +
if (not defined $mutationToID{$unique} && (not($id=~m/N\/A/gi)))
  +
{
  +
$mutationToID{$unique} = $id;
  +
}
  +
  +
if (defined $mutationToSeq{$unique})
  +
{
  +
if ($seq eq "Q30201")
  +
{
  +
$mutationToSeq{$unique} = $seq;
  +
}
  +
elsif ($seq eq "NP_000401" && $mutationToSeq{$unique} ne "Q30201")
  +
{
  +
$mutationToSeq{$unique} = $seq;
  +
}
  +
}
  +
else
  +
{
  +
$mutationToSeq{$unique} = $seq;
  +
}
  +
  +
if (not defined $mutationToType{$unique})
  +
{
  +
$mutationToType{$unique} = $type;
  +
}
  +
  +
if (not defined $mutationToAA{$unique})
  +
{
  +
$mutationToAA{$unique} = $aa;
  +
}
  +
  +
if (not defined $mutationToPos{$unique})
  +
{
  +
$mutationToPos{$unique} = $pos;
  +
}
  +
  +
if ((not defined $mutationToEffect{$unique}) && $effect eq "Disease causing")
  +
{
  +
$mutationToEffect{$unique} = $effect;
  +
}
  +
  +
if ((not defined $mutationToEvidence{$unique}) && ($evidence!~m/Notvalidated/gi))
  +
{
  +
$mutationToEvidence{$unique} = $evidence;
  +
}
  +
elsif (defined $mutationToEvidence{$unique} && ($evidence!~m/Notvalidated/gi))
  +
{
  +
my @tmp1 = split(/,/, $mutationToEvidence{$unique});
  +
my @tmp2 = split(/,/, $evidence);
  +
my %tmp3;
  +
  +
for my $ev1 (@tmp1)
  +
{
  +
$tmp3{$ev1} = 1;
  +
}
  +
  +
for my $ev2 (@tmp2)
  +
{
  +
$tmp3{$ev2} = 1;
  +
}
  +
  +
my $new = "";
  +
  +
for my $key (keys %tmp3)
  +
{
  +
$new = $new.$key.",";
  +
}
  +
  +
chop($new);
  +
  +
$mutationToEvidence{$unique}= $new;
  +
}
  +
}
  +
}
  +
  +
my @keys = sort{$mutationToPos{$a} <=> $mutationToPos{$b}} keys %mutationToPos;
  +
  +
  +
open (OUT, ">", $outFile) or die;
  +
  +
print OUT "dbSNP ID\tSequence\tType\tAmino acid change\tPosition\tEffect\tExperimental evidence\n";
  +
  +
for my $key (@keys)
  +
{
  +
if (not defined $mutationToID{$key})
  +
{
  +
$mutationToID{$key} = "N/A";
  +
}
  +
  +
if (not defined $mutationToEffect{$key})
  +
{
  +
$mutationToEffect{$key} = "No effect";
  +
}
  +
  +
if (not defined $mutationToEvidence{$key})
  +
{
  +
$mutationToEvidence{$key} = "Not validated";
  +
}
  +
  +
print OUT $mutationToID{$key}."\t";
  +
print OUT $mutationToSeq{$key}."\t";
  +
print OUT $mutationToType{$key}."\t";
  +
print OUT $mutationToAA{$key}."\t";
  +
print OUT $mutationToPos{$key}."\t";
  +
print OUT $mutationToEffect{$key}."\t";
  +
print OUT $mutationToEvidence{$key}."\n";
  +
}
  +
  +
close OUT;
  +
</source>
   
 
<br style="clear:both;">
 
<br style="clear:both;">
Line 36: Line 531:
 
=== OMIM ===
 
=== OMIM ===
   
  +
  +
We searched [http://omim.org/ OMIM] for the [http://omim.org/entry/613609 HFE Gene] and looked at the listed [http://omim.org/allelicVariant/613609 allelic variants].
   
 
<br style="clear:both;">
 
<br style="clear:both;">
Line 41: Line 538:
 
=== SNPedia ===
 
=== SNPedia ===
   
  +
  +
For [http://www.snpedia.com/index.php/SNPedia SNPedia] we looked at the SNPs listed under [http://www.snpedia.com/index.php/HFE HFE].
   
 
<br style="clear:both;">
 
<br style="clear:both;">

Latest revision as of 14:10, 10 June 2012

Translation and alignment tools

  • Translation (RNA/DNA->AA): Link
  • Online alignment: Link
  • Reference protein sequence: Q30201


SNP search


HGMD

We used the non-professional version of HGMD and looked for SNPs using the gene symbol HFE as query.

In order to get the SNP statistics the following script was used:

<source lang="perl">

  1. !/usr/local/bin/perl

use strict; use warnings; use diagnostics; use sigtrap; use autodie;

my $inFile = "HFE.htm"; my $outFile = "HGMD.txt";


open (IN, "<", $inFile) or die ("Could not open input file!"); open (OUT, ">", $outFile) or die ("Could not open output file!");

  1. print header

print OUT "ID\tType\tCodon change\tAmino acid change\tPosition\tPhenotype\tReference\n";

while (<IN>) { my $line = $_; my $dummy = $line;

$dummy=~s/\s//gi;

if ($dummy ne "") { my @content = split(/<\/td>/, $line);

my $id = $content[0]; my $type; my $codon = $content[1]; my $aa = $content[2]; my $pos = $content[3]; my $phenotype = $content[5]; my $ref = $content[6]; my $link;

$id =~m/>(\w+?)</gi; $id = $1;

$codon =~m/>([\w-]+?)$/gi; $codon = $1;

$aa =~m/>([\w-]+?)$/gi; $aa = $1;

$pos =~m/>(\d+?)$/gi; $pos = $1;

$phenotype =~m/>([\w\s,?]+?)$/gi; $phenotype = $1;

$ref =~m/^<.*?href="(.+?)">(.*?)<\/a>/gi; $link = $1; $ref = $2; $ref =~ s/<.*?>//gi;

if ($aa =~m/Term/gi) { $type = "Nonsense"; } elsif ($aa =~m/^(.+?)-(.+?)$/g && $1 eq $2) { $type = "Synonymous"; } else { $type = "Non-synonymous"; }

print OUT "$id\t$type\t$codon\t$aa\t$pos\t$phenotype\t[$link $ref]\n"; } }

close IN; close OUT; </source>


dbSNP


SNPdbe

We've searched SNPdbe for SNPs using NP_000401, Q30201, and HFE as query IDs.

The SNP data was extracted from the textfiles with a perl script:

<source lang="perl">

  1. !/usr/local/bin/perl

use strict; use warnings; use diagnostics; use sigtrap; use autodie;

  1. my $inFile = "snpsQ30201.txt";
  2. my $outFile = "snpsQ30201_out.txt";
  3. my $seqFile = "snpsQ30201_seq.txt";
  4. my $inFile = "snpsNP000401.txt";
  5. my $outFile = "snpsNP000401_out.txt";
  6. my $seqFile = "snpsNP000401_seq.txt";

my $inFile = "snpsHFE.txt"; my $outFile = "snpsHFE_out.txt"; my $seqFile = "snpsHFE_seq.txt";

my %mutationToPos; my %mutationToLine; my %uniqueSeq;


open (IN, "<", $inFile) or die; open (OUT, ">", $outFile) or die; open (SEQ, ">", $seqFile) or die;

my $line = <IN>;

print OUT "dbSNP ID\tSequence\tType\tAmino acid change\tPosition\tEffect\tExperimental evidence\n";


while (<IN>) { my $line = $_; my @content = split(/\|/, $line);

my $name = $content[0]; my $dbSNP = $content[1]; my $gene = $content[6]; my $protein = $content[7]; my $seq = $content[10]; my $disease = $content[19]; my $wt = $content[21]; my $mt = $content[24]; my $effect = "TODO"; my $evidence = $content[31];

if ($gene eq "HFE") { $name=~m/(\D)(\d+?)(\D)/gi;

my $pos = $2; my $first = oneTo3($1); my $last = oneTo3($3); my $type;

if ($first eq $last) { $type = "Synonymous"; } else { $type = "Non-synonymous"; }

if ($disease =~m/h(a)*emochromatosis/gi || $wt >= 40) { $effect = "Disease causing"; } else { $effect = "No effect"; }

if (defined $uniqueSeq{$protein} && $uniqueSeq{$protein} ne $seq) { print "Warning: different sequences for same protein id: $protein"; }

$uniqueSeq{$protein} = $seq;

print OUT "$dbSNP\t$protein\t$type\t$first-$last\t$pos\t$effect\t$evidence\n"; } }

print SEQ ">sp|Q30201|HFE_HUMAN\n"; print SEQ "...\n"; #sequence removed for protocol site (too long)

for my $key (keys %uniqueSeq) { print SEQ ">$key\n"; print SEQ "$uniqueSeq{$key}\n"; }

close IN; close OUT; close SEQ;


sub oneTo3 { my $single = $_[0];

if ($single eq "A"){return "Ala";} if ($single eq "R"){return "Arg";} if ($single eq "N"){return "Asn";} if ($single eq "D"){return "Asp";} if ($single eq "C"){return "Cys";} if ($single eq "E"){return "Glu";} if ($single eq "Q"){return "Gln";} if ($single eq "G"){return "Gly";} if ($single eq "H"){return "His";} if ($single eq "I"){return "Ile";} if ($single eq "L"){return "Leu";} if ($single eq "K"){return "Lys";} if ($single eq "M"){return "Met";} if ($single eq "F"){return "Phe";} if ($single eq "P"){return "Pro";} if ($single eq "S"){return "Ser";} if ($single eq "T"){return "Thr";} if ($single eq "W"){return "Trp";} if ($single eq "Y"){return "Tyr";} if ($single eq "V"){return "Val";}

return "XXX"; } </source>


For the HFE search the positions had to be corrected for the reference sequence:

<source lang="perl">

  1. !/usr/local/bin/perl

use strict; use warnings; use diagnostics; use sigtrap; use autodie;

my $inFile = "snpsHFE_out.txt"; my $alnFile = "snpsHFE.fasta_aln"; my $outFile = "snpsHFE_cor.txt";

my %lineToPos;

my @refSeq = split(//, "..."); #sequence removed for protocol site (too long)

open (IN, "<", $inFile) or die; open (OUT, ">", $outFile) or die;

my $header = <IN>;

print OUT $header;

while (<IN>) { my $line = $_; my @content = split(/\t/, $line);

my $mutation = $content[3]; my $pos = $content[4]; my $protein = $content[1];

$pos = makePosMap($protein, $pos); $content[4] = $pos; $line = "";

#rebuild line; for my $item (@content) { $line = $line.$item;

if ($item ne $content[scalar(@content)-1]) { $line = $line."\t"; } }

print OUT $line; }

close IN; close OUT;


sub makePosMap { my $protein = $_[0]; my $oldPos = $_[1]; my $seq = "";

open (ALN, "<", $alnFile) or die;

while (<ALN>) { my $line = $_;

if ($line =~m/^>$protein/) { while ($line = <ALN>) { if ($line =~m/^>/) { last; } else { $line =~s/\s//gi; $seq = $seq.$line; } } last; } }

close ALN;

my @chars = split(//, $seq); my $relPos = 1; my $newPos = 1;

#assuming no gaps in the MSA's reference sequence

for (my $pos = 0; $pos < scalar(@chars); $pos++) { if ($chars[$pos] ne "-") { if ($oldPos == $relPos) { $newPos = $pos+1;

last; }

$relPos++; } elsif ($oldPos == $relPos) { print "Something went wrong! Given position is a gap!\n"; } }

if ($chars[$newPos-1] ne $refSeq[$newPos-1]) { print "Residue at mutation position differs from reference sequence! $chars[$newPos]<>$refSeq[$newPos]\n"; }

return $newPos; } </source>


In the last step all search results were combined, filtered for unique results, and sorted by their position:

<source lang="perl">

  1. !/usr/local/bin/perl

use strict; use warnings; use diagnostics; use sigtrap; use autodie;

my @inFiles = ("snpsNP000401_out.txt", "snpsQ30201_out.txt", "snpsHFE_cor.txt"); my $outFile = "SNPdbe.txt";

my %mutationToID; my %mutationToSeq; my %mutationToType; my %mutationToAA; my %mutationToPos; my %mutationToEffect; my %mutationToEvidence;


for my $file (@inFiles) { open (IN, "<", $file) or die;

my $line = <IN>; my @content;

while (<IN>) { $line = $_; @content = split(/\t/, $line);

my $id = $content[0]; my $seq = $content[1]; my $type = $content[2]; my $aa = $content[3]; my $pos = $content[4]; my $effect = $content[5]; my $evidence = $content[6]; my $unique = $aa.$pos;

$evidence =~s/by//gi; $evidence =~s/\s//gi;

if (not defined $mutationToID{$unique} && (not($id=~m/N\/A/gi))) { $mutationToID{$unique} = $id; }

if (defined $mutationToSeq{$unique}) { if ($seq eq "Q30201") { $mutationToSeq{$unique} = $seq; } elsif ($seq eq "NP_000401" && $mutationToSeq{$unique} ne "Q30201") { $mutationToSeq{$unique} = $seq; } } else { $mutationToSeq{$unique} = $seq; }

if (not defined $mutationToType{$unique}) { $mutationToType{$unique} = $type; }

if (not defined $mutationToAA{$unique}) { $mutationToAA{$unique} = $aa; }

if (not defined $mutationToPos{$unique}) { $mutationToPos{$unique} = $pos; }

if ((not defined $mutationToEffect{$unique}) && $effect eq "Disease causing") { $mutationToEffect{$unique} = $effect; }

if ((not defined $mutationToEvidence{$unique}) && ($evidence!~m/Notvalidated/gi)) { $mutationToEvidence{$unique} = $evidence; } elsif (defined $mutationToEvidence{$unique} && ($evidence!~m/Notvalidated/gi)) { my @tmp1 = split(/,/, $mutationToEvidence{$unique}); my @tmp2 = split(/,/, $evidence); my %tmp3;

for my $ev1 (@tmp1) { $tmp3{$ev1} = 1; }

for my $ev2 (@tmp2) { $tmp3{$ev2} = 1; }

my $new = "";

for my $key (keys %tmp3) { $new = $new.$key.","; }

chop($new);

$mutationToEvidence{$unique}= $new; } } }

my @keys = sort{$mutationToPos{$a} <=> $mutationToPos{$b}} keys %mutationToPos;


open (OUT, ">", $outFile) or die;

print OUT "dbSNP ID\tSequence\tType\tAmino acid change\tPosition\tEffect\tExperimental evidence\n";

for my $key (@keys) { if (not defined $mutationToID{$key}) { $mutationToID{$key} = "N/A"; }

if (not defined $mutationToEffect{$key}) { $mutationToEffect{$key} = "No effect"; }

if (not defined $mutationToEvidence{$key}) { $mutationToEvidence{$key} = "Not validated"; }

print OUT $mutationToID{$key}."\t"; print OUT $mutationToSeq{$key}."\t"; print OUT $mutationToType{$key}."\t"; print OUT $mutationToAA{$key}."\t"; print OUT $mutationToPos{$key}."\t"; print OUT $mutationToEffect{$key}."\t"; print OUT $mutationToEvidence{$key}."\n"; }

close OUT; </source>


OMIM

We searched OMIM for the HFE Gene and looked at the listed allelic variants.


SNPedia

For SNPedia we looked at the SNPs listed under HFE.


Mapping