Package Bio :: Package Wise :: Module dnal
[hide private]
[frames] | no frames]

Source Code for Module Bio.Wise.dnal

  1  #!/usr/bin/env python 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bio.Wise contains modules for running and processing the output of 
  7  # some of the models in the Wise2 package by Ewan Birney available from: 
  8  # ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ 
  9  # http://www.ebi.ac.uk/Wise2/ 
 10  #  
 11  # Bio.Wise.psw is for protein Smith-Waterman alignments 
 12  # Bio.Wise.dnal is for Smith-Waterman DNA alignments 
 13   
 14  from __future__ import division 
 15   
 16  __version__ = "$Revision: 1.11 $" 
 17   
 18  import commands 
 19  import itertools 
 20  import os 
 21  import re 
 22   
 23  from Bio import Wise 
 24   
 25  _SCORE_MATCH = 4 
 26  _SCORE_MISMATCH = -1 
 27  _SCORE_GAP_START = -5 
 28  _SCORE_GAP_EXTENSION = -1 
 29   
 30  _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"] 
 31   
32 -def _build_dnal_cmdline(match, mismatch, gap, extension):
33 res = _CMDLINE_DNAL[:] 34 res.extend(["-match", str(match)]) 35 res.extend(["-mis", str(mismatch)]) 36 res.extend(["-gap", str(-gap)]) # negative: convert score to penalty 37 res.extend(["-ext", str(-extension)]) # negative: convert score to penalty 38 39 return res
40 41 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
42 -def _fgrep_count(pattern, file):
43 return int(commands.getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
44 45 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
46 -def _alb_line2coords(line):
47 return tuple([int(coord)+1 # one-based -> zero-based 48 for coord 49 in _re_alb_line2coords.match(line).groups()])
50
51 -def _get_coords(filename):
52 alb = file(filename) 53 54 start_line = None 55 end_line = None 56 57 for line in alb: 58 if line.startswith("["): 59 if not start_line: 60 start_line = line # rstrip not needed 61 else: 62 end_line = line 63 64 if end_line is None: # sequence is too short 65 return [(0, 0), (0, 0)] 66 67 return zip(*map(_alb_line2coords, [start_line, end_line])) # returns [(start0, end0), (start1, end1)]
68
69 -def _any(seq, pred=bool):
70 "Returns True if pred(x) is True at least one element in the iterable" 71 return True in itertools.imap(pred, seq)
72
73 -class Statistics(object):
74 """ 75 Calculate statistics from an ALB report 76 """
77 - def __init__(self, filename, match, mismatch, gap, extension):
78 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename) 79 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename) 80 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename) 81 82 if gap == extension: 83 self.extensions = 0 84 else: 85 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename) 86 87 self.score = (match*self.matches + 88 mismatch*self.mismatches + 89 gap*self.gaps + 90 extension*self.extensions) 91 92 if _any([self.matches, self.mismatches, self.gaps, self.extensions]): 93 self.coords = _get_coords(filename) 94 else: 95 self.coords = [(0, 0), (0,0)]
96
97 - def identity_fraction(self):
98 return self.matches/(self.matches+self.mismatches)
99 100 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions" 101
102 - def __str__(self):
103 return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
104
105 -def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
106 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension) 107 temp_file = Wise.align(cmdline, pair, **keywds) 108 try: 109 return Statistics(temp_file.name, match, mismatch, gap, extension) 110 except AttributeError: 111 try: 112 keywds['dry_run'] 113 return None 114 except KeyError: 115 raise
116
117 -def main():
118 import sys 119 stats = align(sys.argv[1:3]) 120 print "\n".join(["%s: %s" % (attr, getattr(stats, attr)) 121 for attr in 122 ("matches", "mismatches", "gaps", "extensions")]) 123 print "identity_fraction: %s" % stats.identity_fraction() 124 print "coords: %s" % stats.coords
125
126 -def _test(*args, **keywds):
127 import doctest, sys 128 doctest.testmod(sys.modules[__name__], *args, **keywds)
129 130 if __name__ == "__main__": 131 if __debug__: 132 _test() 133 main() 134