Package Bio :: Package SeqIO :: Module InsdcIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqIO.InsdcIO

  1  # Copyright 2007-2009 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package.. 
  6   
  7  """Bio.SeqIO support for the "genbank" and "embl" file formats. 
  8   
  9  You are expected to use this module via the Bio.SeqIO functions. 
 10  Note that internally this module calls Bio.GenBank to do the actual 
 11  parsing of both GenBank and EMBL files. 
 12   
 13  See also: 
 14   
 15  International Nucleotide Sequence Database Collaboration 
 16  http://www.insdc.org/ 
 17    
 18  GenBank 
 19  http://www.ncbi.nlm.nih.gov/Genbank/ 
 20   
 21  EMBL Nucleotide Sequence Database 
 22  http://www.ebi.ac.uk/embl/ 
 23   
 24  DDBJ (DNA Data Bank of Japan) 
 25  http://www.ddbj.nig.ac.jp/ 
 26  """ 
 27   
 28  from Bio.Seq import UnknownSeq 
 29  from Bio.GenBank.Scanner import GenBankScanner, EmblScanner 
 30  from Bio import Alphabet 
 31  from Interfaces import SequentialSequenceWriter 
 32  from Bio import SeqFeature 
 33   
 34  # NOTE 
 35  # ==== 
 36  # The "brains" for parsing GenBank and EMBL files (and any 
 37  # other flat file variants from the INSDC in future) is in 
 38  # Bio.GenBank.Scanner (plus the _FeatureConsumer in Bio.GenBank) 
 39  # However, all the writing code is in this file. 
 40   
 41   
42 -def GenBankIterator(handle) :
43 """Breaks up a Genbank file into SeqRecord objects. 44 45 Every section from the LOCUS line to the terminating // becomes 46 a single SeqRecord with associated annotation and features. 47 48 Note that for genomes or chromosomes, there is typically only 49 one record.""" 50 #This calls a generator function: 51 return GenBankScanner(debug=0).parse_records(handle)
52
53 -def EmblIterator(handle) :
54 """Breaks up an EMBL file into SeqRecord objects. 55 56 Every section from the LOCUS line to the terminating // becomes 57 a single SeqRecord with associated annotation and features. 58 59 Note that for genomes or chromosomes, there is typically only 60 one record.""" 61 #This calls a generator function: 62 return EmblScanner(debug=0).parse_records(handle)
63
64 -def GenBankCdsFeatureIterator(handle, alphabet=Alphabet.generic_protein) :
65 """Breaks up a Genbank file into SeqRecord objects for each CDS feature. 66 67 Every section from the LOCUS line to the terminating // can contain 68 many CDS features. These are returned as with the stated amino acid 69 translation sequence (if given). 70 """ 71 #This calls a generator function: 72 return GenBankScanner(debug=0).parse_cds_features(handle, alphabet)
73
74 -def EmblCdsFeatureIterator(handle, alphabet=Alphabet.generic_protein) :
75 """Breaks up a EMBL file into SeqRecord objects for each CDS feature. 76 77 Every section from the LOCUS line to the terminating // can contain 78 many CDS features. These are returned as with the stated amino acid 79 translation sequence (if given). 80 """ 81 #This calls a generator function: 82 return EmblScanner(debug=0).parse_cds_features(handle, alphabet)
83
84 -def _insdc_feature_position_string(pos, offset=0):
85 """Build a GenBank/EMBL position string (PRIVATE). 86 87 Use offset=1 to add one to convert a start position from python counting. 88 """ 89 if isinstance(pos, SeqFeature.ExactPosition) : 90 return "%i" % (pos.position+offset) 91 elif isinstance(pos, SeqFeature.WithinPosition) : 92 return "(%i.%i)" % (pos.position + offset, 93 pos.position + pos.extension + offset) 94 elif isinstance(pos, SeqFeature.BetweenPosition) : 95 return "(%i^%i)" % (pos.position + offset, 96 pos.position + pos.extension + offset) 97 elif isinstance(pos, SeqFeature.BeforePosition) : 98 return "<%i" % (pos.position + offset) 99 elif isinstance(pos, SeqFeature.AfterPosition) : 100 return ">%i" % (pos.position + offset) 101 elif isinstance(pos, SeqFeature.OneOfPosition): 102 return "one-of(%s)" \ 103 % ",".join([_insdc_feature_position_string(p,offset) \ 104 for p in pos.position_choices]) 105 elif isinstance(pos, SeqFeature.AbstractPosition) : 106 raise NotImplementedError("Please report this as a bug in Biopython.") 107 else : 108 raise ValueError("Expected a SeqFeature position object.")
109 110
111 -def _insdc_location_string_ignoring_strand_and_subfeatures(feature) :
112 if feature.ref : 113 ref = "%s:" % feature.ref 114 else : 115 ref = "" 116 assert not feature.ref_db 117 if feature.location.start==feature.location.end \ 118 and isinstance(feature.location.end, SeqFeature.ExactPosition): 119 #Special case, 12^13 gets mapped to location 12:12 120 #(a zero length slice, meaning the point between two letters) 121 return "%s%i^%i" % (ref, feature.location.end.position, 122 feature.location.end.position+1) 123 else : 124 #Typical case, e.g. 12..15 gets mapped to 11:15 125 return ref \ 126 + _insdc_feature_position_string(feature.location.start, +1) \ 127 + ".." + \ 128 _insdc_feature_position_string(feature.location.end)
129
130 -def _insdc_feature_location_string(feature):
131 """Build a GenBank/EMBL location string from a SeqFeature (PRIVATE).""" 132 # Have a choice of how to show joins on the reverse complement strand, 133 # complement(join(1,10),(20,100)) vs join(complement(20,100),complement(1,10)) 134 # Notice that the order of the entries gets flipped! 135 # 136 # GenBank and EMBL would both use now complement(join(1,10),(20,100)) 137 # which is shorter at least. 138 # 139 # In the above situations, we expect the parent feature and the two children 140 # to all be marked as strand==-1, and in the order 0:10 then 19:100. 141 # 142 # Also need to consider dual-strand examples like these from the Arabidopsis 143 # thaliana chloroplast NC_000932: join(complement(69611..69724),139856..140650) 144 # gene ArthCp047, GeneID:844801 or its CDS which is even better due to a splice: 145 # join(complement(69611..69724),139856..140087,140625..140650) 146 # protein NP_051038.1 GI:7525057 147 # 148 149 if not feature.sub_features : 150 #Non-recursive. 151 #assert feature.location_operator == "", \ 152 # "%s has no subfeatures but location_operator %s" \ 153 # % (repr(feature), feature.location_operator) 154 location = _insdc_location_string_ignoring_strand_and_subfeatures(feature) 155 if feature.strand == -1 : 156 location = "complement(%s)" % location 157 return location 158 # As noted above, treat reverse complement strand features carefully: 159 if feature.strand == -1 : 160 for f in feature.sub_features : 161 assert f.strand == -1 162 return "complement(%s(%s))" \ 163 % (feature.location_operator, 164 ",".join(_insdc_location_string_ignoring_strand_and_subfeatures(f) \ 165 for f in feature.sub_features)) 166 #if feature.strand == +1 : 167 # for f in feature.sub_features : 168 # assert f.strand == +1 169 #This covers typical forward strand features, and also an evil mixed strand: 170 assert feature.location_operator != "" 171 return "%s(%s)" % (feature.location_operator, 172 ",".join([_insdc_feature_location_string(f) \ 173 for f in feature.sub_features]))
174 175
176 -class GenBankWriter(SequentialSequenceWriter) :
177 HEADER_WIDTH = 12 178 MAX_WIDTH = 80 179 QUALIFIER_INDENT = 21 180
181 - def _write_single_line(self, tag, text) :
182 "Used in the the 'header' of each GenBank record.""" 183 assert len(tag) < self.HEADER_WIDTH 184 assert len(text) < self.MAX_WIDTH - self.HEADER_WIDTH, \ 185 "Annotation %s too long for %s line" % (repr(text), tag) 186 self.handle.write("%s%s\n" % (tag.ljust(self.HEADER_WIDTH), 187 text.replace("\n"," ")))
188
189 - def _write_multi_line(self, tag, text) :
190 "Used in the the 'header' of each GenBank record.""" 191 #TODO - Do the line spliting while preserving white space? 192 max_len = self.MAX_WIDTH - self.HEADER_WIDTH 193 assert len(tag) < self.HEADER_WIDTH 194 text = text.strip() 195 if len(text) < max_len : 196 self._write_single_line(tag, text) 197 return 198 199 words = text.split() 200 assert max([len(w) for w in words]) < max_len, \ 201 "Your description cannot be broken into nice lines!" 202 text = "" 203 while words and len(text) + 1 + len(words[0]) < max_len : 204 text += " " + words.pop(0) 205 text = text.strip() 206 assert len(text) < max_len 207 self._write_single_line(tag, text) 208 while words : 209 text = "" 210 while words and len(text) + 1 + len(words[0]) < max_len : 211 text += " " + words.pop(0) 212 text = text.strip() 213 assert len(text) < max_len 214 self._write_single_line("", text) 215 assert not words
216
217 - def _write_multi_entries(self, tag, text_list) :
218 #used for DBLINK and any similar later line types. 219 #If the list of strings is empty, nothing is written. 220 for i, text in enumerate(text_list) : 221 if i==0 : 222 self._write_single_line(tag, text) 223 else : 224 self._write_single_line("", text)
225
226 - def _write_the_first_line(self, record) :
227 """Write the LOCUS line.""" 228 229 locus = record.name 230 if not locus or locus == "<unknown name>" : 231 locus = record.id 232 if not locus or locus == "<unknown id>" : 233 locus = self._get_annotation_str(record, "accession", just_first=True) 234 if len(locus) > 16 : 235 raise ValueError("Locus identifier %s is too long" % repr(locus)) 236 237 if len(record) > 99999999999 : 238 #Currently GenBank only officially support up to 350000, but 239 #the length field can take eleven digits 240 raise ValueError("Sequence too long!") 241 242 #Get the base alphabet (underneath any Gapped or StopCodon encoding) 243 a = Alphabet._get_base_alphabet(record.seq.alphabet) 244 if not isinstance(a, Alphabet.Alphabet) : 245 raise TypeError("Invalid alphabet") 246 elif isinstance(a, Alphabet.ProteinAlphabet) : 247 units = "aa" 248 elif isinstance(a, Alphabet.NucleotideAlphabet) : 249 units = "bp" 250 else : 251 #Must be something like NucleotideAlphabet or 252 #just the generic Alphabet (default for fasta files) 253 raise ValueError("Need a Nucleotide or Protein alphabet") 254 255 #Get the molecule type 256 #TODO - record this explicitly in the parser? 257 if isinstance(a, Alphabet.ProteinAlphabet) : 258 mol_type = "" 259 elif isinstance(a, Alphabet.DNAAlphabet) : 260 mol_type = "DNA" 261 elif isinstance(a, Alphabet.RNAAlphabet) : 262 mol_type = "RNA" 263 else : 264 #Must be something like NucleotideAlphabet or 265 #just the generic Alphabet (default for fasta files) 266 raise ValueError("Need a DNA, RNA or Protein alphabet") 267 268 try : 269 division = record.annotations["data_file_division"] 270 except KeyError : 271 division = "UNK" 272 if division not in ["PRI","ROD","MAM","VRT","INV","PLN","BCT", 273 "VRL","PHG","SYN","UNA","EST","PAT","STS", 274 "GSS","HTG","HTC","ENV","CON"] : 275 division = "UNK" 276 277 assert len(units) == 2 278 assert len(division) == 3 279 #TODO - date 280 #TODO - mol_type 281 line = "LOCUS %s %s %s %s %s 01-JAN-1980\n" \ 282 % (locus.ljust(16), 283 str(len(record)).rjust(11), 284 units, 285 mol_type.ljust(6), 286 division) 287 assert len(line) == 79+1, repr(line) #plus one for new line 288 289 assert line[12:28].rstrip() == locus, \ 290 'LOCUS line does not contain the locus at the expected position:\n' + line 291 assert line[28:29] == " " 292 assert line[29:40].lstrip() == str(len(record)), \ 293 'LOCUS line does not contain the length at the expected position:\n' + line 294 295 #Tests copied from Bio.GenBank.Scanner 296 assert line[40:44] in [' bp ', ' aa '] , \ 297 'LOCUS line does not contain size units at expected position:\n' + line 298 assert line[44:47] in [' ', 'ss-', 'ds-', 'ms-'], \ 299 'LOCUS line does not have valid strand type (Single stranded, ...):\n' + line 300 assert line[47:54].strip() == "" \ 301 or line[47:54].strip().find('DNA') != -1 \ 302 or line[47:54].strip().find('RNA') != -1, \ 303 'LOCUS line does not contain valid sequence type (DNA, RNA, ...):\n' + line 304 assert line[54:55] == ' ', \ 305 'LOCUS line does not contain space at position 55:\n' + line 306 assert line[55:63].strip() in ['','linear','circular'], \ 307 'LOCUS line does not contain valid entry (linear, circular, ...):\n' + line 308 assert line[63:64] == ' ', \ 309 'LOCUS line does not contain space at position 64:\n' + line 310 assert line[67:68] == ' ', \ 311 'LOCUS line does not contain space at position 68:\n' + line 312 assert line[70:71] == '-', \ 313 'LOCUS line does not contain - at position 71 in date:\n' + line 314 assert line[74:75] == '-', \ 315 'LOCUS line does not contain - at position 75 in date:\n' + line 316 317 self.handle.write(line)
318
319 - def _get_annotation_str(self, record, key, default=".", just_first=False) :
320 """Get an annotation dictionary entry (as a string). 321 322 Some entries are lists, in which case if just_first=True the first entry 323 is returned. If just_first=False (default) this verifies there is only 324 one entry before returning it.""" 325 try : 326 answer = record.annotations[key] 327 except KeyError : 328 return default 329 if isinstance(answer, list) : 330 if not just_first : assert len(answer) == 1 331 return str(answer[0]) 332 else : 333 return str(answer)
334
335 - def _write_sequence(self, record):
336 #Loosely based on code from Howard Salis 337 #TODO - Force lower case? 338 LETTERS_PER_LINE = 60 339 SEQUENCE_INDENT = 9 340 341 if isinstance(record.seq, UnknownSeq) : 342 #We have already recorded the length, and there is no need 343 #to record a long sequence of NNNNNNN...NNN or whatever. 344 return 345 346 data = self._get_seq_string(record) #Catches sequence being None 347 seq_len = len(data) 348 for line_number in range(0,seq_len,LETTERS_PER_LINE): 349 self.handle.write(str(line_number+1).rjust(SEQUENCE_INDENT)) 350 for words in range(line_number,min(line_number+LETTERS_PER_LINE,seq_len),10): 351 self.handle.write(" %s" % data[words:words+10]) 352 self.handle.write("\n")
353
354 - def write_record(self, record):
355 """Write a single record to the output file.""" 356 handle = self.handle 357 self._write_the_first_line(record) 358 359 accession = self._get_annotation_str(record, "accession", 360 record.id.split(".",1)[0], 361 just_first=True) 362 acc_with_version = accession 363 if record.id.startswith(accession+".") : 364 try : 365 acc_with_version = "%s.%i" \ 366 % (accession, int(record.id.split(".",1)[1])) 367 except ValueError : 368 pass 369 gi = self._get_annotation_str(record, "gi", just_first=True) 370 371 descr = record.description 372 if descr == "<unknown description>" : descr = "." 373 self._write_multi_line("DEFINITION", descr) 374 375 self._write_single_line("ACCESSION", accession) 376 if gi != "." : 377 self._write_single_line("VERSION", "%s GI:%s" % (acc_with_version,gi)) 378 else : 379 self._write_single_line("VERSION", "%s" % (acc_with_version)) 380 381 #The NCBI only expect two types of link so far, 382 #e.g. "Project:28471" and "Trace Assembly Archive:123456" 383 #TODO - Filter the dbxrefs list to just these? 384 self._write_multi_entries("DBLINK", record.dbxrefs) 385 386 try : 387 #List of strings 388 keywords = "; ".join(record.annotations["keywords"]) 389 except KeyError : 390 keywords = "." 391 self._write_multi_line("KEYWORDS", keywords) 392 393 if "segment" in record.annotations : 394 #Deal with SEGMENT line found only in segmented records, 395 #e.g. AH000819 396 segment = record.annotations["segment"] 397 if isinstance(segment, list) : 398 assert len(segment)==1, segment 399 segment = segment[0] 400 self._write_single_line("SEGMENT", segment) 401 402 self._write_multi_line("SOURCE", \ 403 self._get_annotation_str(record, "source")) 404 #The ORGANISM line MUST be a single line, as any continuation is the taxonomy 405 org = self._get_annotation_str(record, "organism") 406 if len(org) > self.MAX_WIDTH - self.HEADER_WIDTH : 407 org = org[:self.MAX_WIDTH - self.HEADER_WIDTH-4]+"..." 408 self._write_single_line(" ORGANISM", org) 409 try : 410 #List of strings 411 taxonomy = "; ".join(record.annotations["taxonomy"]) 412 except KeyError : 413 taxonomy = "." 414 self._write_multi_line("", taxonomy) 415 416 #TODO - References... 417 handle.write("FEATURES Location/Qualifiers\n") 418 for feature in record.features : 419 self._write_feature(feature) 420 handle.write("ORIGIN\n") 421 self._write_sequence(record) 422 handle.write("//\n")
423
424 - def _write_feature_qualifier(self, key, value=None, quote=None) :
425 if not value : 426 self.handle.write("%s/%s\n" % (" "*self.QUALIFIER_INDENT, key)) 427 return 428 #Quick hack with no line wrapping, may be useful for testing: 429 #self.handle.write('%s/%s="%s"\n' % (" "*self.QUALIFIER_INDENT, key, value)) 430 if quote is None : 431 #Try to mimic unwritten rules about when quotes can be left out: 432 if isinstance(value, int) or isinstance(value, long) : 433 quote = False 434 else : 435 quote = True 436 if quote : 437 line = '%s/%s="%s"' % (" "*self.QUALIFIER_INDENT, key, value) 438 else : 439 line = '%s/%s=%s' % (" "*self.QUALIFIER_INDENT, key, value) 440 if len(line) < self.MAX_WIDTH : 441 self.handle.write(line+"\n") 442 return 443 while line.lstrip() : 444 if len(line) < self.MAX_WIDTH : 445 self.handle.write(line+"\n") 446 return 447 #Insert line break... 448 for index in range(min(len(line)-1,self.MAX_WIDTH),self.QUALIFIER_INDENT+1,-1) : 449 if line[index]==" " : break 450 if line[index] != " " : 451 #No nice place to break... 452 index = self.MAX_WIDTH 453 self.handle.write(line[:index] + "\n") 454 line = " "*self.QUALIFIER_INDENT + line[index:].lstrip()
455
456 - def _wrap_location(self, location) :
457 """Split a feature location into lines (break at commas).""" 458 #TODO - Rewrite this not to recurse! 459 length = self.MAX_WIDTH - self.QUALIFIER_INDENT 460 if len(location) <= length : 461 return location 462 index = location[:length].rfind(",") 463 if index == -1 : 464 #No good place to split (!) 465 import warnings 466 warnings.warn("Couldn't split location:\n%s" % location) 467 return location 468 return location[:index+1] + "\n" + \ 469 " "*self.QUALIFIER_INDENT + self._wrap_location(location[index+1:])
470
471 - def _write_feature(self, feature):
472 """Write a single SeqFeature object to features table.""" 473 assert feature.type, feature 474 #TODO - Line wrapping for long locations! 475 location = _insdc_feature_location_string(feature) 476 line = (" %s " % feature.type)[:self.QUALIFIER_INDENT] \ 477 + self._wrap_location(location) + "\n" 478 self.handle.write(line) 479 #Now the qualifiers... 480 for key, values in feature.qualifiers.iteritems() : 481 if isinstance(values, list) or isinstance(values, tuple) : 482 for value in values : 483 self._write_feature_qualifier(key, value) 484 elif values : 485 #String, int, etc 486 self._write_feature_qualifier(key, values) 487 else : 488 #e.g. a /psuedo entry 489 self._write_feature_qualifier(key)
490 491 492 if __name__ == "__main__" : 493 print "Quick self test" 494 import os 495 from StringIO import StringIO 496
497 - def compare_record(old, new) :
498 if old.id != new.id and old.name != new.name : 499 raise ValueError("'%s' or '%s' vs '%s' or '%s' records" \ 500 % (old.id, old.name, new.id, new.name)) 501 if len(old.seq) != len(new.seq) : 502 raise ValueError("%i vs %i" % (len(old.seq), len(new.seq))) 503 if str(old.seq).upper() != str(new.seq).upper() : 504 if len(old.seq) < 200 : 505 raise ValueError("'%s' vs '%s'" % (old.seq, new.seq)) 506 else : 507 raise ValueError("'%s...' vs '%s...'" % (old.seq[:100], new.seq[:100])) 508 if old.features and new.features : 509 return compare_features(old.features, new.features) 510 #Just insist on at least one word in common: 511 if (old.description or new.description) \ 512 and not set(old.description.split()).intersection(new.description.split()): 513 raise ValueError("%s versus %s" \ 514 % (repr(old.description), repr(new.description))) 515 #TODO - check annotation 516 return True
517
518 - def compare_records(old_list, new_list) :
519 """Check two lists of SeqRecords agree, raises a ValueError if mismatch.""" 520 if len(old_list) != len(new_list) : 521 raise ValueError("%i vs %i records" % (len(old_list), len(new_list))) 522 for old, new in zip(old_list, new_list) : 523 if not compare_record(old,new) : 524 return False 525 return True
526
527 - def compare_feature(old, new, ignore_sub_features=False) :
528 """Check two SeqFeatures agree.""" 529 if old.type != new.type : 530 raise ValueError("Type %s versus %s" % (old.type, new.type)) 531 if old.location.nofuzzy_start != new.location.nofuzzy_start \ 532 or old.location.nofuzzy_end != new.location.nofuzzy_end : 533 raise ValueError("%s versus %s:\n%s\nvs:\n%s" \ 534 % (old.location, new.location, str(old), str(new))) 535 if old.strand != new.strand : 536 raise ValueError("Different strand:\n%s\nvs:\n%s" % (str(old), str(new))) 537 if old.location.start != new.location.start : 538 raise ValueError("Start %s versus %s:\n%s\nvs:\n%s" \ 539 % (old.location.start, new.location.start, str(old), str(new))) 540 if old.location.end != new.location.end : 541 raise ValueError("End %s versus %s:\n%s\nvs:\n%s" \ 542 % (old.location.end, new.location.end, str(old), str(new))) 543 if not ignore_sub_features : 544 if len(old.sub_features) != len(new.sub_features) : 545 raise ValueError("Different sub features") 546 for a,b in zip(old.sub_features, new.sub_features) : 547 if not compare_feature(a,b) : 548 return False 549 #This only checks key shared qualifiers 550 #Would a white list be easier? 551 #for key in ["name","gene","translation","codon_table","codon_start","locus_tag"] : 552 for key in set(old.qualifiers.keys()).intersection(new.qualifiers.keys()): 553 if key in ["db_xref","protein_id","product","note"] : 554 #EMBL and GenBank files are use different references/notes/etc 555 continue 556 if old.qualifiers[key] != new.qualifiers[key] : 557 raise ValueError("Qualifier mis-match for %s:\n%s\n%s" \ 558 % (key, old.qualifiers[key], new.qualifiers[key])) 559 return True
560
561 - def compare_features(old_list, new_list, ignore_sub_features=False) :
562 """Check two lists of SeqFeatures agree, raises a ValueError if mismatch.""" 563 if len(old_list) != len(new_list) : 564 raise ValueError("%i vs %i features" % (len(old_list), len(new_list))) 565 for old, new in zip(old_list, new_list) : 566 #This assumes they are in the same order 567 if not compare_feature(old,new,ignore_sub_features) : 568 return False 569 return True
570
571 - def check_genbank_writer(records) :
572 handle = StringIO() 573 GenBankWriter(handle).write_file(records) 574 handle.seek(0) 575 576 records2 = list(GenBankIterator(handle)) 577 assert compare_records(records, records2)
578 579 for filename in os.listdir("../../Tests/GenBank") : 580 if not filename.endswith(".gbk") and not filename.endswith(".gb") : 581 continue 582 print filename 583 584 handle = open("../../Tests/GenBank/%s" % filename) 585 records = list(GenBankIterator(handle)) 586 handle.close() 587 588 check_genbank_writer(records) 589 590 for filename in os.listdir("../../Tests/EMBL") : 591 if not filename.endswith(".embl") : 592 continue 593 print filename 594 595 handle = open("../../Tests/EMBL/%s" % filename) 596 records = list(EmblIterator(handle)) 597 handle.close() 598 599 check_genbank_writer(records) 600 601 from Bio import SeqIO 602 for filename in os.listdir("../../Tests/SwissProt") : 603 if not filename.startswith("sp") : 604 continue 605 print filename 606 607 handle = open("../../Tests/SwissProt/%s" % filename) 608 records = list(SeqIO.parse(handle,"swiss")) 609 handle.close() 610 611 check_genbank_writer(records) 612