Package Bio :: Package Motif :: Package Parsers :: Module MEME
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif.Parsers.MEME

  1  # Copyright 2008 by Bartek Wilczynski 
  2  # Adapted from  Bio.MEME.Parser by Jason A. Hackney.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  from Bio.Alphabet import IUPAC 
  8  from Bio import File 
  9  from Bio.ParserSupport import * 
 10  from Bio import Seq 
 11  import re 
 12  from math import sqrt 
 13  import sys 
 14  from Bio.Motif import Motif 
 15   
16 -class MEMEMotif (Motif):
17 """A subclass of Motif used in parsing MEME (and MAST) output. 18 19 This sublcass defines functions and data specific to MEME motifs. 20 This includes the evalue for a motif and the PSSM of the motif. 21 22 Methods: 23 add_instance_from_values (name = 'default', pvalue = 1, sequence = 'ATA', start = 0, strand = +): create a new instance of the motif with the specified values. 24 add_to_pssm (position): add a new position to the pssm. The position should be a list of nucleotide/amino acid frequencies 25 add_to_logodds (position): add a new position to the log odds matrix. The position should be a tuple of log odds values for the nucleotide/amino acid at that position. 26 compare_motifs (other_motif): returns the maximum correlation between this motif and other_motif 27 """
28 - def __init__ (self):
29 Motif.__init__(self) 30 self.evalue = 0.0
31
32 - def _numoccurrences (self, number):
33 if type(number) == int: 34 self.num_occurrences = number 35 else: 36 number = int(number) 37 self.num_occurrences = number
38
39 - def get_instance_by_name (self,name):
40 for i in self.instances: 41 if i.sequence_name == name: 42 return i 43 return None
44
45 - def add_instance_from_values (self, name = 'default', pvalue = 1, sequence = 'ATA', start = 0, strand = '+'):
46 inst = MEMEInstance(sequence,self.alphabet) 47 inst._pvalue(pvalue) 48 inst._seqname(name) 49 inst._start(start) 50 inst._strand(strand) 51 if self.length: 52 inst._length(self.length) 53 if self.name: 54 inst._motifname(self.name) 55 self.add_instance(inst)
56
57 - def _evalue (self, evalue):
58 if type(evalue) == float: 59 self.evalue = evalue 60 else: 61 evalue = float(evalue) 62 self.evalue = evalue
63 64
65 -class MEMEInstance(Seq.Seq):
66 """A class describing the instances of a MEME motif, and the data thereof. 67 """
68 - def __init__ (self,*args,**kwds):
69 Seq.Seq.__init__(self,*args,**kwds) 70 self.sequence_name = "" 71 self.start = 0 72 self.pvalue = 1.0 73 self.strand = 0 74 self.length = 0 75 self.motif_name = ""
76 77
78 - def _seqname (self, name):
79 self.sequence_name = name
80
81 - def _motifname (self, name):
82 self.motif_name = name
83
84 - def _start (self,start):
85 start = int(start) 86 self.start = start
87
88 - def _pvalue (self,pval):
89 pval = float(pval) 90 self.pvalue = pval
91
92 - def _score (self, score):
93 score = float(score) 94 self.score = score
95
96 - def _strand (self, strand):
97 self.strand = strand
98
99 - def _length (self, length):
100 self.length = length
101 102
103 -class MEMERecord:
104 """A class for holding the results of a MEME run. 105 106 A MEMERecord is an object that holds the results from running 107 MEME. It implements no methods of its own. 108 109 """
110 - def __init__ (self):
111 """__init__ (self)""" 112 self.motifs = [] 113 self.version = "" 114 self.datafile = "" 115 self.command = "" 116 self.alphabet = None 117 self.sequence_names = []
118
119 - def get_motif_by_name (self, name):
120 for m in self.motifs: 121 if m.name == name: 122 return m
123
124 -class MEMEParser (AbstractParser):
125 """A parser for the text output of the MEME program. 126 Parses the output into an object of the MEMERecord class. 127 128 Methods: 129 parse (handle): parses the contents of the file handle passed to it. 130 131 Example: 132 133 >>>f = open("meme.output.txt") 134 >>>parser = MEMEParser() 135 >>>meme_record = parser.parse(f) 136 >>>for motif in meme_record.motifs: 137 ... for instance in motif.instances: 138 ... print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue 139 140 """
141 - def __init__ (self):
142 """__init__ (self)""" 143 self._scanner = _MEMEScanner() 144 self._consumer = _MEMEConsumer()
145
146 - def parse (self, handle):
147 """parse (self, handle)""" 148 self._scanner.feed(handle, self._consumer) 149 return self._consumer.data
150 151 152
153 -class _MEMEScanner:
154 """Scanner for MEME output. 155 156 Methods: 157 feed 158 159 """ 160
161 - def feed (self, handle, consumer):
162 """ 163 Feeds in MEME output for scanning. handle should 164 implement the readline method. consumer is 165 a Consumer object that can receive the salient events. 166 """ 167 if isinstance(handle, File.UndoHandle): 168 uhandle = handle 169 else: 170 uhandle = File.UndoHandle(handle) 171 172 self._scan_header(uhandle, consumer) 173 self._scan_motifs (uhandle, consumer)
174
175 - def _scan_header(self, uhandle, consumer):
176 try: 177 read_and_call_until(uhandle, consumer.noevent, contains = 'MEME version') 178 except ValueError: 179 raise ValueError("Improper input file. File should contain a line starting MEME version.") 180 read_and_call(uhandle, consumer._version, start = 'MEME version') 181 read_and_call_until(uhandle, consumer.noevent, start = 'TRAINING SET') 182 read_and_call(uhandle, consumer.noevent, start = 'TRAINING SET') 183 read_and_call(uhandle, consumer.noevent, start = '****') 184 read_and_call(uhandle, consumer._datafile, start = 'DATAFILE') 185 read_and_call(uhandle, consumer._alphabet, start = 'ALPHABET') 186 read_and_call(uhandle, consumer.noevent, start = 'Sequence name') 187 read_and_call(uhandle, consumer.noevent, start = '----') 188 read_and_call_until(uhandle, consumer._sequence_name, start = '***') 189 read_and_call_until(uhandle, consumer.noevent, start = 'command:') 190 read_and_call(uhandle, consumer._commandline, start = 'command:') 191 read_and_call_until(uhandle, consumer.noevent, start = 'MOTIF 1')
192
193 - def _scan_motifs(self, uhandle, consumer):
194 while 1: 195 read_and_call(uhandle, consumer._add_motif_with_info, start = 'MOTIF') 196 read_and_call_until(uhandle, consumer.noevent, contains = 'sorted by position p-value') 197 read_and_call(uhandle, consumer.motif_name, contains = 'sorted by position p-value') 198 read_and_call(uhandle, consumer.noevent, start = '---') 199 read_and_call(uhandle, consumer.noevent, start = 'Sequence name') 200 read_and_call(uhandle, consumer.noevent, start = '---') 201 read_and_call_until(uhandle, consumer.add_instance, start = '---') 202 read_and_call_until(uhandle, consumer.noevent, start = 'log-odds matrix') 203 read_and_call(uhandle, consumer.noevent) 204 read_and_call_until(uhandle, consumer.add_to_logodds, start = '---') 205 read_and_call_until(uhandle, consumer.noevent, start = 'letter-probability matrix') 206 read_and_call(uhandle, consumer.noevent, start = 'letter-probability matrix') 207 read_and_call_until(uhandle, consumer.add_to_pssm, start = '---') 208 read_and_call_until(uhandle, consumer.noevent, start = 'Time') 209 read_and_call(uhandle, consumer.noevent, start = 'Time') 210 read_and_call(uhandle, consumer.noevent, blank = 1) 211 read_and_call(uhandle, consumer.noevent, start = '***') 212 read_and_call_while(uhandle, consumer.noevent, blank = 1) 213 read_and_call(uhandle, consumer.noevent, start = '***') 214 line = safe_peekline(uhandle) 215 if line.startswith("SUMMARY OF MOTIFS"): 216 break
217 218 219
220 -class _MEMEConsumer:
221 """ 222 Consumer that can receive events from MEME Scanner. 223 224 This is the Consumer object that should be passed to the 225 MEME Scanner. 226 """ 227
228 - def __init__ (self):
229 self.current_motif = None 230 self.sequence_names = [] 231 self.data = MEMERecord()
232
233 - def _version (self, line):
234 line = line.strip() 235 ls = line.split() 236 self.data.version = ls[2]
237
238 - def _datafile (self, line):
239 line = line.strip() 240 line = line.replace('DATAFILE= ','') 241 self.data.datafile = line
242
243 - def _alphabet (self, line):
244 line = line.strip() 245 line = line.replace('ALPHABET= ','') 246 if line == 'ACGT': 247 al = IUPAC.unambiguous_dna 248 else: 249 al = IUPAC.protein 250 self.data.alphabet = al
251
252 - def _sequence_name (self, line):
253 line = line.strip() 254 ls = line.split() 255 self.data.sequence_names.append(ls[0]) 256 if len(ls) == 6: 257 self.data.sequence_names.append(ls[3])
258
259 - def _commandline (self, line):
260 line = line.strip() 261 line = line.replace('command: ','') 262 self.data.command = line
263
264 - def _add_motif_with_info (self, line):
265 line = line.strip() 266 ls = line.split() 267 motif = MEMEMotif() 268 #motif.length=ls[4] 269 motif._numoccurrences(ls[7]) 270 motif._evalue(ls[13]) 271 motif.alphabet=self.data.alphabet 272 self.data.motifs.append(motif) 273 self.current_motif = motif
274
275 - def motif_name (self, line):
276 line = line.strip() 277 ls = line.split() 278 name = " ".join(ls[0:2]) 279 self.current_motif.name=name
280
281 - def add_instance (self, line):
282 line = line.strip() 283 ls = line.split() 284 if self.data.command.find('revcomp') != -1: 285 #seq = Seq.Seq(ls[5], self.data.alphabet) 286 self.current_motif.add_instance_from_values(name = ls[0], sequence = ls[5], start = ls[2], pvalue = ls[3], strand = ls[1]) 287 else: 288 #seq = Seq.Seq(ls[4], self.data.alphabet) 289 self.current_motif.add_instance_from_values(name = ls[0], sequence = ls[4], start = ls[1], pvalue = ls[2])
290
291 - def add_to_pssm (self, line):
292 pass
293
294 - def add_to_logodds (self, line):
295 pass
296
297 - def noevent (self,line):
298 pass
299 300 301
302 -class _MASTConsumer:
303 """ 304 Consumer that can receive events from _MASTScanner. 305 306 A _MASTConsumer parses lines from a mast text output file. 307 The motif match diagrams are parsed using line buffering. 308 Each of the buffering functions have a dummy variable that is 309 required for testing using the Bio.ParserSupport.TaggingConsumer. 310 If this variable isn't there, the TaggingConsumer barfs. In 311 the _MASTScanner, None is passed in the place of this variable. 312 """
313 - def __init__ (self):
314 self.data = MASTRecord() 315 self._current_seq = "" 316 self._line_buffer = [] 317 self._buffer_size = 0 318 self._buffered_seq_start = 0
319
320 - def _version (self, line):
321 line = line.strip() 322 ls = line.split() 323 self.data._version(ls[2])
324
325 - def _database (self, line):
326 line = line.strip() 327 ls = line.split() 328 self.data._database(ls[1]) 329 al = "" 330 if ls[2] == '(nucleotide)': 331 al = IUPAC.unambiguous_dna 332 self.data._alphabet(al) 333 else: 334 al = IUPAC.protein 335 self.data._alphabet(al)
336
337 - def _add_motif (self, line):
338 line = line.strip() 339 ls = line.split() 340 m = MEMEMotif() 341 m.alphabet=self.data.alphabet 342 m.length=ls[1] 343 name = ls[0] 344 m.name=name 345 m.add_instance(ls[2]) 346 self.data._add_motif(m)
347
348 - def _add_match_diagram (self, line):
349 line = line.strip() 350 ls = line.split() 351 self.data._add_diagram_for_sequence(ls[1], self._current_seq) 352 ds = ls[1].split('_') 353 i = 0 354 start = 0 355 for i in range(0,len(ds)): 356 if ds[i].find('[') != -1 or ds[i].find('<') != -1: 357 inst = MEMEInstance() 358 inst._seqname (self._current_seq) 359 inst._start (start) 360 r = re.compile('\d+') 361 mn = r.findall(ds[i])[0] 362 if ds[i].find('-') != -1: 363 inst.strand = '-' 364 else: 365 inst.strand = '+' 366 motif = self.data.get_motif_by_name(mn) 367 motif.add_instance(inst) 368 start += motif.length 369 else: 370 start += int(ds[i])
371
372 - def _add_sequence_match_with_diagram (self, line):
373 line = line.strip() 374 ls = line.split() 375 self.data._add_sequence(ls[0]) 376 self.data._add_diagram_for_sequence(ls[2],ls[0]) 377 ds = ls[2].split('_') 378 i = 0 379 start = 0 380 for i in range(0,len(ds)): 381 if ds[i].find('+') != -1 or ds[i].find('-') != -1: 382 inst = MEMEInstance() 383 inst._seqname (ls[0]) 384 inst._start (start) 385 r = re.compile('\d+') 386 mn = r.findall(ds[i])[0] 387 if ds[i].find('-') != -1: 388 inst.strand = '-' 389 else: 390 inst.strand = '+' 391 motif = self.data.get_motif_by_name(mn) 392 motif.add_instance(inst) 393 start += motif.length 394 else: 395 start += int(ds[i])
396
397 - def _add_diagram_from_buffer (self, dummy):
398 line = "" 399 for l in self._line_buffer: 400 line += l.strip() 401 ls = line.split() 402 self.data._add_diagram_for_sequence(ls[1], self._current_seq) 403 ds = ls[1].split('_') 404 i = 0 405 start = 0 406 for i in range(0,len(ds)): 407 if ds[i].find('[') != -1 or ds[i].find('<') != -1: 408 inst = MEMEInstance() 409 inst._seqname (self._current_seq) 410 inst._start (start) 411 r = re.compile('\d+') 412 mn = r.findall(ds[i])[0] 413 if ds[i].find('-') != -1: 414 inst.strand = '-' 415 else: 416 inst.strand = '+' 417 motif = self.data.get_motif_by_name(mn) 418 motif.add_instance(inst) 419 start += motif.length 420 else: 421 start += int(ds[i])
422
423 - def _set_current_seq (self, line):
424 line = line.strip() 425 self._current_seq = line 426 if not self.data.sequences.count(line): 427 self.data.sequences.append(line)
428
429 - def _add_line_to_buffer (self, line):
430 line = line.strip() 431 if not line.startswith('*****'): 432 self._line_buffer.append(line) 433 else: 434 return -1
435
436 - def _parse_buffer (self, dummy):
437 """Parses the line buffer to get e-values for each instance of a motif. 438 This buffer parser is the most likely point of failure for the 439 MASTParser. 440 """ 441 insts = self.data.get_motif_matches_for_sequence(self._current_seq) 442 if len(insts) > 0: 443 444 fullSeq = self._line_buffer[self._buffer_size-1] 445 pvals = self._line_buffer[1].split() 446 p = 0 447 lpval = len(pvals) 448 while p < lpval: 449 if pvals[p].count('e') > 1: 450 #Break blocks up by e and parse into valid floats. This only 451 #works if there are no e-values greater than 1e-5. 452 pvs = [] 453 spe = pvals[p].split('e') 454 spe.reverse() 455 dotind = spe[1].find('.') 456 if dotind == -1: 457 thispval = spe[1][-1] + 'e' + spe[0] 458 else: 459 thispval = spe[1][dotind-1:] + 'e' + spe[0] 460 pvs.append(thispval) 461 for spi in range(2,len(spe)): 462 dotind = spe[spi].find('.') 463 prevdotind = spe[spi-1].find('.') 464 if dotind != -1: 465 if prevdotind == -1: 466 thispval = spe[spi][dotind-1:] + 'e' + spe[spi-1][:-1] 467 else: 468 thispval = spe[spi][dotind-1:] + 'e' + spe[spi-1][0:prevdotind-1] 469 else: 470 if prevdotind == -1: 471 thispval = spe[spi][-1] + 'e' + spe[spi-1][:-1] 472 else: 473 thispval = spe[spi][-1] + 'e' + spe[spi-1][0:prevdotind-1] 474 pvs.append(thispval) 475 pvs.reverse() 476 if p > 0: 477 pvals = pvals[0:p] + pvs + pvals[p+1:] 478 else: 479 pvals = pvs + pvals[p+1:] 480 lpval = len(pvals) 481 p += 1 482 i = 0 483 if len(pvals) != len(insts): 484 sys.stderr.write("Failure to parse p-values for " + self._current_seq + ": " + self._line_buffer[1] + " to: " + str(pvals) + "\n") 485 pvals = [] 486 # else: 487 # sys.stderr.write('These are just fine' + self._current_seq + ': ' + self._line_buffer[1] + " to: " + str(pvals) + "\n") 488 for i in range(0,len(insts)): 489 inst = insts[i] 490 start = inst.start - self._buffered_seq_start + 1 491 thisSeq = fullSeq[start:start+inst.length] 492 thisSeq = Seq.Seq(thisSeq, self.data.alphabet) 493 inst._sequence(thisSeq) 494 if pvals: 495 inst._pvalue(float(pvals[i]))
496
497 - def _blank_buffer (self, dummy):
498 self._line_buffer = [] 499 self._buffer_size = 0
500
501 - def _collapse_buffer(self, dummy):
502 if self._buffer_size == 0: 503 if len(self._line_buffer) > 0: 504 self._buffer_size = len(self._line_buffer) 505 ll = self._line_buffer[self._buffer_size-1].split() 506 self._line_buffer[self._buffer_size-1] = ll[1] 507 self._buffered_seq_start = int(ll[0]) 508 else: 509 i = 0 510 for i in range(self._buffer_size, len(self._line_buffer)-1): 511 self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size] + self._line_buffer[i].strip() 512 ll = self._line_buffer[len(self._line_buffer)-1].split() 513 if int(ll[0]) == self._buffered_seq_start + len(self._line_buffer[self._buffer_size-1]): 514 self._line_buffer[self._buffer_size-1] += ll[1] 515 else: 516 differ = int(ll[0]) - (self._buffered_seq_start + len(self._line_buffer[self._buffer_size-1])) 517 self._line_buffer[self._buffer_size-1] += "N"*differ 518 self._line_buffer[self._buffer_size-1] += ll[1] 519 self._line_buffer = self._line_buffer[0:self._buffer_size]
520
521 - def _add_motif_match (self, line):
522 line = line.strip() 523 if line.find('[') != -1 or line.find('<') != -1: 524 pass 525 elif line.find('e') != -1: 526 pass 527 elif line.find('+') != -1: 528 pass
529
530 - def noevent (self, line):
531 pass
532 533 534
535 -class MASTParser(AbstractParser):
536 """ 537 Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord 538 539 A MASTParser takes a file handle for a MAST text output file and 540 returns a MASTRecord, containing the hits between motifs and 541 sequences. The parser does some unusual line buffering to parse out 542 match diagrams. Really complex diagrams often lead to an error message 543 and p-values not being parsed for a given line. 544 545 Methods: 546 parse (handle): parses the data from the file handle passed to it. 547 548 Example: 549 550 >>>f = open("mast_file.txt") 551 >>>parser = MASTParser() 552 >>>mast_record = parser.parse(f) 553 >>>for motif in mast_record.motifs: 554 >>> for instance in motif.instances: 555 >>> print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue 556 """
557 - def __init__ (self):
558 self._consumer = _MASTConsumer() 559 self._scanner = _MASTScanner()
560
561 - def parse (self, handle):
562 self._scanner.feed(handle, self._consumer) 563 return self._consumer.data
564 565 566
567 -class _MASTScanner:
568 """ 569 Scanner for MAST text output. 570 571 """
572 - def feed (self, handle, consumer):
573 if isinstance(handle, File.UndoHandle): 574 uhandle = handle 575 else: 576 uhandle = File.UndoHandle(handle) 577 578 self._scan_header(uhandle, consumer) 579 self._scan_matches(uhandle, consumer) 580 self._scan_annotated_matches(uhandle, consumer)
581
582 - def _scan_header (self, uhandle, consumer):
583 try: 584 read_and_call_until(uhandle, consumer.noevent, contains = "MAST version") 585 except ValueError: 586 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 587 read_and_call(uhandle, consumer._version, contains = 'MAST version') 588 read_and_call_until(uhandle, consumer.noevent, start = 'DATABASE AND MOTIFS') 589 read_and_call(uhandle, consumer.noevent, start = 'DATABASE') 590 read_and_call(uhandle, consumer.noevent, start = '****') 591 read_and_call(uhandle, consumer._database, contains = 'DATABASE') 592 read_and_call_until(uhandle, consumer.noevent, contains = 'MOTIF WIDTH') 593 read_and_call(uhandle, consumer.noevent, contains = 'MOTIF') 594 read_and_call(uhandle, consumer.noevent, contains = '----') 595 read_and_call_until(uhandle, consumer._add_motif, blank = 1) 596 read_and_call_until(uhandle, consumer.noevent, start = 'SECTION II:')
597
598 - def _scan_matches (self, uhandle, consumer):
599 read_and_call_until(uhandle, consumer.noevent, start = 'SEQUENCE NAME') 600 read_and_call(uhandle, consumer.noevent, start = 'SEQUENCE NAME') 601 read_and_call(uhandle, consumer.noevent, start = '---') 602 # read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1) 603 read_and_call_until(uhandle, consumer.noevent, blank = 1) 604 read_and_call(uhandle, consumer.noevent, blank = 1)
605
606 - def _scan_annotated_matches (self, uhandle, consumer):
607 read_and_call_until(uhandle, consumer.noevent, start = 'SECTION III:') 608 read_and_call(uhandle, consumer.noevent, start = 'SECTION III:') 609 read_and_call_until(uhandle, consumer.noevent, start = '****') 610 read_and_call(uhandle, consumer.noevent, start = '****') 611 read_and_call_until(uhandle, consumer.noevent, start = '*****') 612 read_and_call(uhandle, consumer.noevent) 613 read_and_call_while(uhandle, consumer.noevent, blank = 1) 614 readMatches = 1 615 while readMatches == 1: 616 if consumer._current_seq: 617 if consumer._buffer_size != 0: 618 consumer._parse_buffer(None) 619 consumer._blank_buffer(None) 620 read_and_call(uhandle, consumer._set_current_seq) 621 read_and_call_until(uhandle, consumer.noevent, start = ' DIAGRAM') 622 read_and_call_until(uhandle, consumer._add_line_to_buffer, blank = 1) 623 consumer._add_diagram_from_buffer(None) 624 consumer._blank_buffer(None) 625 read_and_call(uhandle, consumer.noevent, blank = 1) 626 while 1: 627 line = safe_peekline(uhandle) 628 if line.startswith('****'): 629 consumer._parse_buffer(None) 630 readMatches = 0 631 break 632 read_and_call_until(uhandle, consumer._add_line_to_buffer, blank = 1) 633 read_and_call(uhandle, consumer.noevent, blank = 1) 634 consumer._collapse_buffer(None) 635 if attempt_read_and_call(uhandle, consumer.noevent, blank = 1): 636 break 637 elif attempt_read_and_call(uhandle, consumer.noevent, start = '*****'): 638 consumer._parse_buffer(None) 639 consumer._blank_buffer(None) 640 readMatches = 0 641 break
642 643 644
645 -class MASTRecord:
646 """The class for holding the results from a MAST run. 647 648 A MASTRecord holds data about matches between motifs and sequences. 649 The motifs held by the MASTRecord are objects of the class MEMEMotif. 650 651 Methods: 652 get_motif_matches_for_sequence(sequence_name): returns all of the 653 motif matches within a given sequence. The matches are objects of 654 the class MEMEInstance 655 get_motif_matches (motif_name): returns all of the matches for a motif 656 in the sequences searched. The matches returned are of class 657 MEMEInstance 658 get_motif_by_name (motif_name): returns a MEMEMotif with the given 659 name. 660 """
661 - def __init__ (self):
662 self.sequences = [] 663 self.version = "" 664 self.matches = [] 665 self.database = "" 666 self.diagrams = {} 667 self.alphabet = None 668 self.motifs = []
669
670 - def _version (self, version):
671 self.version = version
672
673 - def _alphabet (self, alphabet):
674 if alphabet == IUPAC.protein or alphabet == IUPAC.ambiguous_dna or alphabet == IUPAC.unambiguous_dna: 675 self.alphabet = alphabet 676 else: 677 return -1
678
679 - def _database(self, database):
680 self.database = database
681
682 - def get_motif_matches_for_sequence (self, seq):
683 insts = [] 684 for m in self.motifs: 685 for i in m.instances: 686 if i.sequence_name == seq: 687 insts.append(i) 688 insts.sort(lambda x,y: cmp(x.start, y.start)) 689 return insts
690
691 - def get_motif_matches (self, motif):
692 m = self.get_motif_by_name (motif.name) 693 return m.instances
694
695 - def _add_diagram_for_sequence (self, diagram, seq):
696 self.diagrams[seq] = diagram
697
698 - def _add_match (self, match):
699 self.matches.append(match)
700
701 - def _add_sequence (self, sequence):
703
704 - def _add_motif (self, motif):
705 self.motifs.append(motif)
706
707 - def get_motif_by_name (self, name):
708 for m in self.motifs: 709 if m.name == name: 710 return m
711