Trees | Indices | Help |
---|
|
1 # Copyright 2004 by James Casbon. All rights reserved. 2 # This code is part of the Biopython distribution and governed by its 3 # license. Please see the LICENSE file that should have been included 4 # as part of this package. 5 6 """ 7 Code to deal with COMPASS output, a program for profile/profile comparison. 8 9 Compass is described in: 10 11 Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein 12 alignments with assessment of statistical significance. J Mol Biol. 2003 Feb 13 7;326(1):317-36. 14 15 Tested with COMPASS 1.24. 16 17 Classes: 18 Record One result of a compass file 19 _Scanner Scan compass results 20 _Consumer Consume scanner events 21 RecordParser Parse one compass record 22 Iterator Iterate through a number of compass records 23 """ 24 from Bio import File 25 from Bio.ParserSupport import * 26 import re 2729 """ 30 Hold information from one compass hit. 31 Ali1 one is the query, Ali2 the hit. 32 """ 336435 self.query='' 36 self.hit='' 37 self.gap_threshold=0 38 self.query_length=0 39 self.query_filtered_length=0 40 self.query_nseqs=0 41 self.query_neffseqs=0 42 self.hit_length=0 43 self.hit_filtered_length=0 44 self.hit_nseqs=0 45 self.hit_neffseqs=0 46 self.sw_score=0 47 self.evalue=-1 48 self.query_start=-1 49 self.hit_start=-1 50 self.query_aln='' 51 self.hit_aln='' 52 self.positives=''53 5456 """Return the length of the query covered in alignment""" 57 s = self.query_aln.replace("=", "") 58 return len(s)5966 """Reads compass output and generate events""" 6714369 """Feed in COMPASS ouput""" 70 71 if isinstance(handle, File.UndoHandle): 72 pass 73 else: 74 handle = File.UndoHandle(handle) 75 76 77 assert isinstance(handle, File.UndoHandle), \ 78 "handle must be an UndoHandle" 79 if handle.peekline(): 80 self._scan_record(handle, consumer)81 8284 self._scan_names(handle, consumer) 85 self._scan_threshold(handle, consumer) 86 self._scan_lengths(handle,consumer) 87 self._scan_profilewidth(handle, consumer) 88 self._scan_scores(handle,consumer) 89 self._scan_alignment(handle,consumer)9092 """ 93 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 94 """ 95 read_and_call(handle, consumer.names, contains="Ali1:")9698 """ 99 Threshold of effective gap content in columns: 0.5 100 """ 101 read_and_call(handle, consumer.threshold, start="Threshold")102104 """ 105 length1=388 filtered_length1=386 length2=145 filtered_length2=137 106 """ 107 read_and_call(handle, consumer.lengths, start="length1=")108110 """ 111 Nseqs1=399 Neff1=12.972 Nseqs2=1 Neff2=6.099 112 """ 113 read_and_call(handle, consumer.profilewidth, contains="Nseqs1")114116 """ 117 Smith-Waterman score = 37 Evalue = 5.75e+02 118 """ 119 read_and_call(handle, consumer.scores, start="Smith-Waterman")120122 """ 123 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 125 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 126 127 128 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 130 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 131 132 """ 133 while 1: 134 line = handle.readline() 135 if not line: 136 break 137 if is_blank_line(line): 138 continue 139 else: 140 consumer.query_alignment(line) 141 read_and_call(handle, consumer.positive_alignment) 142 read_and_call(handle, consumer.hit_alignment)145 # all regular expressions used -- compile only once 146 _re_names = re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+") 147 _re_threshold = \ 148 re.compile("Threshold of effective gap content in columns: (\S+)") 149 _re_lengths = \ 150 re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)" 151 + "\s+filtered_length2=(\S+)") 152 _re_profilewidth = \ 153 re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)") 154 _re_scores = re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)") 155 _re_start = re.compile("(\d+)") 156 _re_align = re.compile("^.{15}(\S+)") 157 _re_positive_alignment = re.compile("^.{15}(.+)") 158219160 self.data = None161163 """ 164 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 165 ------query----- -------hit------------- 166 """ 167 self.data = Record() 168 m = self.__class__._re_names.search(line) 169 self.data.query = m.group(1) 170 self.data.hit = m.group(2)171 175177 m = self.__class__._re_lengths.search(line) 178 self.data.query_length = int(m.group(1)) 179 self.data.query_filtered_length = float(m.group(2)) 180 self.data.hit_length = int(m.group(3)) 181 self.data.hit_filtered_length = float(m.group(4))182184 m = self.__class__._re_profilewidth.search(line) 185 self.data.query_nseqs = int(m.group(1)) 186 self.data.query_neffseqs = float(m.group(2)) 187 self.data.hit_nseqs = int(m.group(3)) 188 self.data.hit_neffseqs = float(m.group(4))189191 m = self.__class__._re_scores.search(line) 192 if m: 193 self.data.sw_score = int(m.group(1)) 194 self.data.evalue = float(m.group(2)) 195 else: 196 self.data.sw_score = 0 197 self.data.evalue = -1.0198200 m = self.__class__._re_start.search(line) 201 if m: 202 self.data.query_start = int(m.group(1)) 203 m = self.__class__._re_align.match(line) 204 assert m!=None, "invalid match" 205 self.data.query_aln = self.data.query_aln + m.group(1)206208 m = self.__class__._re_positive_alignment.match(line) 209 assert m!=None, "invalid match" 210 self.data.positives = self.data.positives + m.group(1)211221 """Parses compass results into a Record object. 222 """ 226 227235229 if isinstance(handle, File.UndoHandle): 230 uhandle = handle 231 else: 232 uhandle = File.UndoHandle(handle) 233 self._scanner.feed(uhandle, self._consumer) 234 return self._consumer.data237 """Iterate through a file of compass results""" 241260243 lines = [] 244 while 1: 245 line = self._uhandle.readline() 246 if not line: 247 break 248 if line[0:4] == "Ali1" and lines: 249 self._uhandle.saveline(line) 250 break 251 252 lines.append(line) 253 254 255 if not lines: 256 return None 257 258 data = ''.join(lines) 259 return self._parser.parse(File.StringHandle(data))
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Tue Sep 22 19:56:00 2009 | http://epydoc.sourceforge.net |