Package Bio :: Module SeqRecord
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqRecord

  1  # Copyright 2000-2002 Andrew Dalke. 
  2  # Copyright 2002-2004 Brad Chapman. 
  3  # Copyright 2006-2009 by Peter Cock. 
  4  # All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license.  Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8  """Represent a Sequence Record, a sequence with annotation.""" 
  9  __docformat__ = "epytext en" #Simple markup to show doctests nicely 
 10   
 11  # NEEDS TO BE SYNCH WITH THE REST OF BIOPYTHON AND BIOPERL 
 12  # In particular, the SeqRecord and BioSQL.BioSeq.DBSeqRecord classes 
 13  # need to be in sync (this is the BioSQL "Database SeqRecord", see 
 14  # also BioSQL.BioSeq.DBSeq which is the "Database Seq" class) 
 15   
16 -class _RestrictedDict(dict):
17 """Dict which only allows sequences of given length as values (PRIVATE). 18 19 This simple subclass of the python dictionary is used in the SeqRecord 20 object for holding per-letter-annotations. This class is intended to 21 prevent simple errors by only allowing python sequences (e.g. lists, 22 strings and tuples) to be stored, and only if their length matches that 23 expected (the length of the SeqRecord's seq object). It cannot however 24 prevent the entries being edited in situ (for example appending entries 25 to a list). 26 """
27 - def __init__(self, length) :
28 """Create an EMPTY restricted dictionary.""" 29 dict.__init__(self) 30 self._length = int(length)
31 - def __setitem__(self, key, value) :
32 if not hasattr(value,"__len__") or not hasattr(value,"__getitem__") \ 33 or len(value) != self._length : 34 raise TypeError("We only allow python sequences (lists, tuples or " 35 "strings) of length %i." % self._length) 36 dict.__setitem__(self, key, value)
37
38 -class SeqRecord(object):
39 """A SeqRecord object holds a sequence and information about it. 40 41 Main attributes: 42 - id - Identifier such as a locus tag (string) 43 - seq - The sequence itself (Seq object) 44 45 Additional attributes: 46 - name - Sequence name, e.g. gene name (string) 47 - description - Additional text (string) 48 - dbxrefs - List of database cross references (list of strings) 49 - features - Any (sub)features defined (list of SeqFeature objects) 50 - annotations - Further information about the whole sequence (dictionary) 51 Most entries are lists of strings. 52 - letter_annotations - Per letter/symbol annotation (restricted 53 dictionary). This holds python sequences (lists, strings 54 or tuples) whose length matches that of the sequence. 55 A typical use would be to hold a list of integers 56 representing sequencing quality scores, or a string 57 representing the secondary structure. 58 59 You will typically use Bio.SeqIO to read in sequences from files as 60 SeqRecord objects. However, you may want to create your own SeqRecord 61 objects directly (see the __init__ method for further details): 62 63 >>> from Bio.Seq import Seq 64 >>> from Bio.SeqRecord import SeqRecord 65 >>> from Bio.Alphabet import IUPAC 66 >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF", 67 ... IUPAC.protein), 68 ... id="YP_025292.1", name="HokC", 69 ... description="toxic membrane protein") 70 >>> print record 71 ID: YP_025292.1 72 Name: HokC 73 Description: toxic membrane protein 74 Number of features: 0 75 Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein()) 76 77 If you want to save SeqRecord objects to a sequence file, use Bio.SeqIO 78 for this. For the special case where you want the SeqRecord turned into 79 a string in a particular file format there is a format method which uses 80 Bio.SeqIO internally: 81 82 >>> print record.format("fasta") 83 >YP_025292.1 toxic membrane protein 84 MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF 85 <BLANKLINE> 86 """
87 - def __init__(self, seq, id = "<unknown id>", name = "<unknown name>", 88 description = "<unknown description>", dbxrefs = None, 89 features = None):
90 """Create a SeqRecord. 91 92 Arguments: 93 - seq - Sequence, required (Seq or Mutable object) 94 - id - Sequence identifier, recommended (string) 95 - name - Sequence name, optional (string) 96 - description - Sequence description, optional (string) 97 - dbxrefs - Database cross references, optional (list of strings) 98 - features - Any (sub)features, optional (list of SeqFeature objects) 99 100 You will typically use Bio.SeqIO to read in sequences from files as 101 SeqRecord objects. However, you may want to create your own SeqRecord 102 objects directly. 103 104 Note that while an id is optional, we strongly recommend you supply a 105 unique id string for each record. This is especially important 106 if you wish to write your sequences to a file. 107 108 If you don't have the actual sequence, but you do know its length, 109 then using the UnknownSeq object from Bio.Seq is appropriate. 110 111 You can create a 'blank' SeqRecord object, and then populate the 112 attributes later. Note that currently the annotations and the 113 letter_annotations dictionaries cannot be specified when creating 114 the SeqRecord. 115 """ 116 if id is not None and not isinstance(id, basestring) : 117 #Lots of existing code uses id=None... this may be a bad idea. 118 raise TypeError("id argument should be a string") 119 if not isinstance(name, basestring) : 120 raise TypeError("name argument should be a string") 121 if not isinstance(description, basestring) : 122 raise TypeError("description argument should be a string") 123 if dbxrefs is not None and not isinstance(dbxrefs, list) : 124 raise TypeError("dbxrefs argument should be a list (of strings)") 125 if features is not None and not isinstance(features, list) : 126 raise TypeError("features argument should be a list (of SeqFeature objects)") 127 self._seq = seq 128 self.id = id 129 self.name = name 130 self.description = description 131 if dbxrefs is None: 132 dbxrefs = [] 133 self.dbxrefs = dbxrefs 134 # annotations about the whole sequence 135 self.annotations = {} 136 137 # annotations about each letter in the sequence 138 if seq is None : 139 #Should we allow this and use a normal unrestricted dict? 140 self._per_letter_annotations = _RestrictedDict(length=0) 141 else : 142 try : 143 self._per_letter_annotations = _RestrictedDict(length=len(seq)) 144 except : 145 raise TypeError("seq argument should be a Seq or MutableSeq") 146 147 # annotations about parts of the sequence 148 if features is None: 149 features = [] 150 self.features = features
151 152 #TODO - Just make this a read only property?
153 - def _set_per_letter_annotations(self, value) :
154 if not isinstance(value, dict) : 155 raise TypeError("The per-letter-annotations should be a " 156 "(restricted) dictionary.") 157 #Turn this into a restricted-dictionary (and check the entries) 158 try : 159 self._per_letter_annotations = _RestrictedDict(length=len(self.seq)) 160 except AttributeError : 161 #e.g. seq is None 162 self._per_letter_annotations = _RestrictedDict(length=0) 163 self._per_letter_annotations.update(value)
164 letter_annotations = property( \ 165 fget=lambda self : self._per_letter_annotations, 166 fset=_set_per_letter_annotations, 167 doc="""Dictionary of per-letter-annotation for the sequence. 168 169 For example, this can hold quality scores used in FASTQ or QUAL files. 170 Consider this example using Bio.SeqIO to read in an example Solexa 171 variant FASTQ file as a SeqRecord: 172 173 >>> from Bio import SeqIO 174 >>> handle = open("Quality/solexa.fastq", "rU") 175 >>> record = SeqIO.read(handle, "fastq-solexa") 176 >>> handle.close() 177 >>> print record.id, record.seq 178 slxa_0013_1_0001_24 ACAAAAATCACAAGCATTCTTATACACC 179 >>> print record.letter_annotations.keys() 180 ['solexa_quality'] 181 >>> print record.letter_annotations["solexa_quality"] 182 [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -6, -1, -1, -4, -1, -4, -19, -10, -27, -18] 183 184 The per-letter-annotaions get sliced automatically if you slice the 185 parent SeqRecord, for example taking the last ten bases: 186 187 >>> sub_record = record[-10:] 188 >>> print sub_record.id, sub_record.seq 189 slxa_0013_1_0001_24 CTTATACACC 190 >>> print sub_record.letter_annotations["solexa_quality"] 191 [-6, -1, -1, -4, -1, -4, -19, -10, -27, -18] 192 193 Any python sequence (i.e. list, tuple or string) can be recorded in 194 the SeqRecord's letter_annotations dictionary as long as the length 195 matches that of the SeqRecord's sequence. e.g. 196 197 >>> len(sub_record.letter_annotations) 198 1 199 >>> sub_record.letter_annotations["dummy"] = "abcdefghij" 200 >>> len(sub_record.letter_annotations) 201 2 202 203 You can delete entries from the letter_annotations dictionary as usual: 204 205 >>> del sub_record.letter_annotations["solexa_quality"] 206 >>> sub_record.letter_annotations 207 {'dummy': 'abcdefghij'} 208 209 You can completely clear the dictionary easily as follows: 210 211 >>> sub_record.letter_annotations = {} 212 >>> sub_record.letter_annotations 213 {} 214 """) 215
216 - def _set_seq(self, value) :
217 #TODO - Add a deprecation warning that the seq should be write only? 218 if self._per_letter_annotations : 219 #TODO - Make this a warning? Silently empty the dictionary? 220 raise ValueError("You must empty the letter annotations first!") 221 self._seq = value 222 try : 223 self._per_letter_annotations = _RestrictedDict(length=len(self.seq)) 224 except AttributeError : 225 #e.g. seq is None 226 self._per_letter_annotations = _RestrictedDict(length=0)
227 228 seq = property(fget=lambda self : self._seq, 229 fset=_set_seq, 230 doc="The sequence itself, as a Seq or MutableSeq object.") 231
232 - def __getitem__(self, index) :
233 """Returns a sub-sequence or an individual letter. 234 235 Splicing, e.g. my_record[5:10], returns a new SeqRecord for 236 that sub-sequence with approriate annotation preserved. The 237 name, id and description are kept. 238 239 Any per-letter-annotations are sliced to match the requested 240 sub-sequence. Unless a stride is used, all those features 241 which fall fully within the subsequence are included (with 242 their locations adjusted accordingly). 243 244 However, the annotations dictionary and the dbxrefs list are 245 not used for the new SeqRecord, as in general they may not 246 apply to the subsequence. If you want to preserve them, you 247 must explictly copy them to the new SeqRecord yourself. 248 249 Using an integer index, e.g. my_record[5] is shorthand for 250 extracting that letter from the sequence, my_record.seq[5]. 251 252 For example, consider this short protein and its secondary 253 structure as encoded by the PDB (e.g. H for alpha helices), 254 plus a simple feature for its histidine self phosphorylation 255 site: 256 257 >>> from Bio.Seq import Seq 258 >>> from Bio.SeqRecord import SeqRecord 259 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 260 >>> from Bio.Alphabet import IUPAC 261 >>> rec = SeqRecord(Seq("MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLAT" 262 ... "EMMSEQDGYLAESINKDIEECNAIIEQFIDYLR", 263 ... IUPAC.protein), 264 ... id="1JOY", name="EnvZ", 265 ... description="Homodimeric domain of EnvZ from E. coli") 266 >>> rec.letter_annotations["secondary_structure"] = \ 267 " S SSSSSSHHHHHTTTHHHHHHHHHHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHHHHHHTT " 268 >>> rec.features.append(SeqFeature(FeatureLocation(20,21), 269 ... type = "Site")) 270 271 Now let's have a quick look at the full record, 272 273 >>> print rec 274 ID: 1JOY 275 Name: EnvZ 276 Description: Homodimeric domain of EnvZ from E. coli 277 Number of features: 1 278 Per letter annotation for: secondary_structure 279 Seq('MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEE...YLR', IUPACProtein()) 280 >>> print rec.letter_annotations["secondary_structure"] 281 S SSSSSSHHHHHTTTHHHHHHHHHHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHHHHHHTT 282 >>> print rec.features[0].location 283 [20:21] 284 285 Now let's take a sub sequence, here chosen as the first (fractured) 286 alpha helix which includes the histidine phosphorylation site: 287 288 >>> sub = rec[11:41] 289 >>> print sub 290 ID: 1JOY 291 Name: EnvZ 292 Description: Homodimeric domain of EnvZ from E. coli 293 Number of features: 1 294 Per letter annotation for: secondary_structure 295 Seq('RTLLMAGVSHDLRTPLTRIRLATEMMSEQD', IUPACProtein()) 296 >>> print sub.letter_annotations["secondary_structure"] 297 HHHHHTTTHHHHHHHHHHHHHHHHHHHHHH 298 >>> print sub.features[0].location 299 [9:10] 300 301 You can also of course omit the start or end values, for 302 example to get the first ten letters only: 303 304 >>> print rec[:10] 305 ID: 1JOY 306 Name: EnvZ 307 Description: Homodimeric domain of EnvZ from E. coli 308 Number of features: 0 309 Per letter annotation for: secondary_structure 310 Seq('MAAGVKQLAD', IUPACProtein()) 311 312 Or for the last ten letters: 313 314 >>> print rec[-10:] 315 ID: 1JOY 316 Name: EnvZ 317 Description: Homodimeric domain of EnvZ from E. coli 318 Number of features: 0 319 Per letter annotation for: secondary_structure 320 Seq('IIEQFIDYLR', IUPACProtein()) 321 322 If you omit both, then you get a copy of the original record (although 323 lacking the annotations and dbxrefs): 324 325 >>> print rec[:] 326 ID: 1JOY 327 Name: EnvZ 328 Description: Homodimeric domain of EnvZ from E. coli 329 Number of features: 1 330 Per letter annotation for: secondary_structure 331 Seq('MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEE...YLR', IUPACProtein()) 332 333 Finally, indexing with a simple integer is shorthand for pulling out 334 that letter from the sequence directly: 335 336 >>> rec[5] 337 'K' 338 >>> rec.seq[5] 339 'K' 340 """ 341 if isinstance(index, int) : 342 #NOTE - The sequence level annotation like the id, name, etc 343 #do not really apply to a single character. However, should 344 #we try and expose any per-letter-annotation here? If so how? 345 return self.seq[index] 346 elif isinstance(index, slice) : 347 if self.seq is None : 348 raise ValueError("If the sequence is None, we cannot slice it.") 349 parent_length = len(self) 350 answer = self.__class__(self.seq[index], 351 id=self.id, 352 name=self.name, 353 description=self.description) 354 #TODO - The desription may no longer apply. 355 #It would be safer to change it to something 356 #generic like "edited" or the default value. 357 358 #Don't copy the annotation dict and dbxefs list, 359 #they may not apply to a subsequence. 360 #answer.annotations = dict(self.annotations.iteritems()) 361 #answer.dbxrefs = self.dbxrefs[:] 362 363 #TODO - Cope with strides by generating ambiguous locations? 364 if index.step is None or index.step == 1 : 365 #Select relevant features, add them with shifted locations 366 if index.start is None : 367 start = 0 368 else : 369 start = index.start 370 if index.stop is None : 371 stop = -1 372 else : 373 stop = index.stop 374 if (start < 0 or stop < 0) and parent_length == 0 : 375 raise ValueError, \ 376 "Cannot support negative indices without the sequence length" 377 if start < 0 : 378 start = parent_length - start 379 if stop < 0 : 380 stop = parent_length - stop + 1 381 #assert str(self.seq)[index] == str(self.seq)[start:stop] 382 for f in self.features : 383 if start <= f.location.start.position \ 384 and f.location.end.position < stop : 385 answer.features.append(f._shift(-start)) 386 387 #Slice all the values to match the sliced sequence 388 #(this should also work with strides, even negative strides): 389 for key, value in self.letter_annotations.iteritems() : 390 answer._per_letter_annotations[key] = value[index] 391 392 return answer 393 raise ValueError, "Invalid index"
394
395 - def __iter__(self) :
396 """Iterate over the letters in the sequence. 397 398 For example, using Bio.SeqIO to read in a protein FASTA file: 399 400 >>> from Bio import SeqIO 401 >>> record = SeqIO.read(open("Amino/loveliesbleeding.pro"),"fasta") 402 >>> for amino in record : 403 ... print amino 404 ... if amino == "L" : break 405 X 406 A 407 G 408 L 409 >>> print record.seq[3] 410 L 411 412 This is just a shortcut for iterating over the sequence directly: 413 414 >>> for amino in record.seq : 415 ... print amino 416 ... if amino == "L" : break 417 X 418 A 419 G 420 L 421 >>> print record.seq[3] 422 L 423 424 Note that this does not facilitate iteration together with any 425 per-letter-annotation. However, you can achieve that using the 426 python zip function on the record (or its sequence) and the relevant 427 per-letter-annotation: 428 429 >>> from Bio import SeqIO 430 >>> rec = SeqIO.read(open("Quality/solexa.fastq", "rU"), 431 ... "fastq-solexa") 432 >>> print rec.id, rec.seq 433 slxa_0013_1_0001_24 ACAAAAATCACAAGCATTCTTATACACC 434 >>> print rec.letter_annotations.keys() 435 ['solexa_quality'] 436 >>> for nuc, qual in zip(rec,rec.letter_annotations["solexa_quality"]) : 437 ... if qual < -10 : 438 ... print nuc, qual 439 C -19 440 C -27 441 C -18 442 443 You may agree that using zip(rec.seq, ...) is more explicit than using 444 zip(rec, ...) as shown above. 445 """ 446 return iter(self.seq)
447
448 - def __str__(self) :
449 """A human readable summary of the record and its annotation (string). 450 451 The python built in function str works by calling the object's ___str__ 452 method. e.g. 453 454 >>> from Bio.Seq import Seq 455 >>> from Bio.SeqRecord import SeqRecord 456 >>> from Bio.Alphabet import IUPAC 457 >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF", 458 ... IUPAC.protein), 459 ... id="YP_025292.1", name="HokC", 460 ... description="toxic membrane protein, small") 461 >>> print str(record) 462 ID: YP_025292.1 463 Name: HokC 464 Description: toxic membrane protein, small 465 Number of features: 0 466 Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein()) 467 468 In this example you don't actually need to call str explicity, as the 469 print command does this automatically: 470 471 >>> print record 472 ID: YP_025292.1 473 Name: HokC 474 Description: toxic membrane protein, small 475 Number of features: 0 476 Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein()) 477 478 Note that long sequences are shown truncated. 479 """ 480 lines = [] 481 if self.id : lines.append("ID: %s" % self.id) 482 if self.name : lines.append("Name: %s" % self.name) 483 if self.description : lines.append("Description: %s" % self.description) 484 if self.dbxrefs : lines.append("Database cross-references: " \ 485 + ", ".join(self.dbxrefs)) 486 lines.append("Number of features: %i" % len(self.features)) 487 for a in self.annotations: 488 lines.append("/%s=%s" % (a, str(self.annotations[a]))) 489 if self.letter_annotations : 490 lines.append("Per letter annotation for: " \ 491 + ", ".join(self.letter_annotations.keys())) 492 #Don't want to include the entire sequence, 493 #and showing the alphabet is useful: 494 lines.append(repr(self.seq)) 495 return "\n".join(lines)
496
497 - def __repr__(self) :
498 """A concise summary of the record for debugging (string). 499 500 The python built in function repr works by calling the object's ___repr__ 501 method. e.g. 502 503 >>> from Bio.Seq import Seq 504 >>> from Bio.SeqRecord import SeqRecord 505 >>> from Bio.Alphabet import generic_protein 506 >>> rec = SeqRecord(Seq("MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKAT" 507 ... +"GEMKEQTEWHRVVLFGKLAEVASEYLRKGSQVYIEGQLRTRKWTDQ" 508 ... +"SGQDRYTTEVVVNVGGTMQMLGGRQGGGAPAGGNIGGGQPQGGWGQ" 509 ... +"PQQPQGGNQFSGGAQSRPQQSAPAAPSNEPPMDFDDDIPF", 510 ... generic_protein), 511 ... id="NP_418483.1", name="b4059", 512 ... description="ssDNA-binding protein", 513 ... dbxrefs=["ASAP:13298", "GI:16131885", "GeneID:948570"]) 514 >>> print repr(rec) 515 SeqRecord(seq=Seq('MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKATGEMKEQTE...IPF', ProteinAlphabet()), id='NP_418483.1', name='b4059', description='ssDNA-binding protein', dbxrefs=['ASAP:13298', 'GI:16131885', 'GeneID:948570']) 516 517 At the python prompt you can also use this shorthand: 518 519 >>> rec 520 SeqRecord(seq=Seq('MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKATGEMKEQTE...IPF', ProteinAlphabet()), id='NP_418483.1', name='b4059', description='ssDNA-binding protein', dbxrefs=['ASAP:13298', 'GI:16131885', 'GeneID:948570']) 521 522 Note that long sequences are shown truncated. 523 """ 524 return self.__class__.__name__ \ 525 + "(seq=%s, id=%s, name=%s, description=%s, dbxrefs=%s)" \ 526 % tuple(map(repr, (self.seq, self.id, self.name, 527 self.description, self.dbxrefs)))
528
529 - def format(self, format) :
530 r"""Returns the record as a string in the specified file format. 531 532 The format should be a lower case string supported as an output 533 format by Bio.SeqIO, which is used to turn the SeqRecord into a 534 string. e.g. 535 536 >>> from Bio.Seq import Seq 537 >>> from Bio.SeqRecord import SeqRecord 538 >>> from Bio.Alphabet import IUPAC 539 >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF", 540 ... IUPAC.protein), 541 ... id="YP_025292.1", name="HokC", 542 ... description="toxic membrane protein") 543 >>> record.format("fasta") 544 '>YP_025292.1 toxic membrane protein\nMKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF\n' 545 >>> print record.format("fasta") 546 >YP_025292.1 toxic membrane protein 547 MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF 548 <BLANKLINE> 549 550 The python print command automatically appends a new line, meaning 551 in this example a blank line is shown. If you look at the string 552 representation you can see there is a trailing new line (shown as 553 slash n) which is important when writing to a file or if 554 concatenating mutliple sequence strings together. 555 556 Note that this method will NOT work on every possible file format 557 supported by Bio.SeqIO (e.g. some are for multiple sequences only). 558 """ 559 #See also the __format__ added for Python 2.6 / 3.0, PEP 3101 560 #See also the Bio.Align.Generic.Alignment class and its format() 561 return self.__format__(format)
562
563 - def __format__(self, format_spec) :
564 """Returns the record as a string in the specified file format. 565 566 This method supports the python format() function added in 567 Python 2.6/3.0. The format_spec should be a lower case 568 string supported by Bio.SeqIO as an output file format. 569 See also the SeqRecord's format() method. 570 """ 571 if format_spec: 572 from StringIO import StringIO 573 from Bio import SeqIO 574 handle = StringIO() 575 SeqIO.write([self], handle, format_spec) 576 return handle.getvalue() 577 else : 578 #Follow python convention and default to using __str__ 579 return str(self)
580
581 - def __len__(self) :
582 """Returns the length of the sequence. 583 584 For example, using Bio.SeqIO to read in a FASTA nucleotide file: 585 586 >>> from Bio import SeqIO 587 >>> record = SeqIO.read(open("Nucleic/sweetpea.nu"),"fasta") 588 >>> len(record) 589 309 590 >>> len(record.seq) 591 309 592 """ 593 return len(self.seq)
594
595 - def __nonzero__(self) :
596 """Returns True regardless of the length of the sequence. 597 598 This behaviour is for backwards compatibility, since until the 599 __len__ method was added, a SeqRecord always evaluated as True. 600 601 Note that in comparison, a Seq object will evaluate to False if it 602 has a zero length sequence. 603 604 WARNING: The SeqRecord may in future evaluate to False when its 605 sequence is of zero length (in order to better match the Seq 606 object behaviour)! 607 """ 608 return True
609
610 -def _test():
611 """Run the Bio.SeqRecord module's doctests (PRIVATE). 612 613 This will try and locate the unit tests directory, and run the doctests 614 from there in order that the relative paths used in the examples work. 615 """ 616 import doctest 617 import os 618 if os.path.isdir(os.path.join("..","Tests")) : 619 print "Runing doctests..." 620 cur_dir = os.path.abspath(os.curdir) 621 os.chdir(os.path.join("..","Tests")) 622 doctest.testmod() 623 os.chdir(cur_dir) 624 del cur_dir 625 print "Done"
626 627 if __name__ == "__main__": 628 _test() 629