Msa-conservation.py
Source code
<source lang="python">
- !/usr/bin/python
- compute conservation (relative frequency) of residues
- in a multiple sequence alignment
- Author: Shen Wei
- Usage: msa-conservation.py <msa_in_FASTA_format> <comma_separated_positions>
import sys from Bio import AlignIO import re
if __name__ == '__main__': try: aln = AlignIO.read(file(sys.argv[1]), 'fasta') positions = map(int, sys.argv[2].split(',')) indices = range(0, len(positions)) freqs = [0] * len(positions) residues = [] * len(positions) substrs = [] seqs = 0 alnLen = aln.get_alignment_length() query = None for record in aln: # query sequence is the first entry # remove insertions and record sub-string information if query == None: query = recordseq = str(record.seq) for match in re.finditer('\w+', recordseq): substrs.append([match.start(), match.end()]) query = query + recordseq[match.start() : match.end()] continue #count frequency for residues in homolog sequences seqs = seqs + 1 for idx in indices: pos = positions[idx] - 1 original = query[pos] recordseq = for substr in substrs: recordseq = recordseq + record.seq[substr[0] : substr[1]] new = recordseq[pos] if residues[idx] == : residues[idx] = query[pos] if new == original: freqs[idx] = freqs[idx] + 1 # compute frequency freqs = map(lambda x: "%0.2f" % (float(x) / seqs), freqs) # print out selected residues, positions and conservation print '\t'.join(residues) print '\t'.join(map(str, positions)) print '\t'.join(freqs) except IndexError: print 'msa-conservation.py msa.fasta pos1,pos2,...' </source>