Package Bio :: Package expressions :: Module fasta
[hide private]
[frames] | no frames]

Source Code for Module Bio.expressions.fasta

  1  # Read a FASTA description 
  2   
  3  import warnings 
  4  warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning) 
  5   
  6   
  7   
  8  import operator 
  9  from Martel import * 
 10  from Bio import Std 
 11   
 12  ### Parse dbxrefs given the NCBI|descr|line as explained 
 13  ### in ftp://ncbi.nlm.nih.gov/blast/db/README and augmented 
 14  ### by experience 
 15   
16 -def make_2id(s, dbname, primary_name, secondary_name):
17 assert secondary_name is not None 18 if primary_name is None: 19 return Str(s + "||") + \ 20 Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, 21 "type": secondary_name}) 22 23 return Str(s + "|") + \ 24 Std.dbxref_dbid(UntilSep(sep = "|"), {"dbname": dbname, 25 "type": primary_name}) + \ 26 Str("|") + \ 27 Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, 28 "type": secondary_name})
29
30 -def make_1id(s, dbname, name):
31 return Str(s + "|") + \ 32 Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, 33 "type": name})
34 35 ids = [] 36 # gene identifier gi|id # This isn't in the README 37 ids.append(make_1id("gi", "x-gi", "primary")) 38 39 # GenBank gb|accession|locus 40 # gb|U37104|APU37104 41 ids.append(make_2id("gb", "gb", "primary", "secondary")) 42 43 # EMBL Data Library emb|accession|locus 44 # emb|F19596|HSPD04201 45 ids.append(make_2id("emb", "embl", "primary", "secondary")) 46 47 # DDBJ, DNA Database of Japan dbj|accession|locus 48 ids.append(make_2id("dbj", "ddbj", "primary", "secondary")) 49 50 # NBRF PIR pir||entry 51 ids.append(make_2id("pir", "pir", None, "primary")) 52 53 # Protein Research Foundation prf||name 54 ids.append(make_2id("prf", "x-prf", None, "primary")) 55 56 # SWISS-PROT sp|accession|entry name 57 ids.append(make_2id("sp", "sp", "primary", "secondary")) 58 59 # Brookhaven Protein Data Bank pdb|entry|chain 60 ids.append(make_2id("pdb", "x-pdb", "primary", "secondary")) # XXX not correct 61 62 # Patents pat|country|number 63 ids.append(make_2id("pat", "x-pat", "primary", "secondary")) # XXX not correct 64 65 # GenInfo Backbone Id bbs|number 66 ids.append(make_1id("bbs", "x-bbs", "primary")) 67 68 # General database identifier gnl|database|identifier 69 gnl_id = Str("gnl|") + \ 70 Std.dbxref_dbname(UntilSep(sep = "| ")) + \ 71 Str("|") + \ 72 Std.dbxref_dbid(UntilSep(sep = "| ")) 73 ids.append(gnl_id) 74 75 # NCBI Reference Sequence ref|accession|locus 76 ids.append(make_2id("ref", "x-ref", "primary", "secondary")) 77 78 # Local Sequence identifier lcl|identifier 79 ids.append(make_1id("lcl", "local", "primary")) 80 81 # "|" them all together 82 ncbi_word = Std.dbxref(reduce(operator.or_, ids)) 83 84 #ncbi_term = Assert(Re("[^ \R]+\|")) + \ 85 ncbi_term = ncbi_word + Rep(Str("|") + ncbi_word) 86 87 # Anything else 88 generic_term = Std.dbxref( 89 Std.dbxref_dbid(UntilSep(sep = " "), {"dbname": "local"}) 90 ) 91 id_term = ncbi_term | generic_term 92 ########################################################### 93 94 comment_lines = Rep(Str("#") + ToEol()) 95 title = Str(">") + Std.description_line(id_term + UntilEol()) + AnyEol() 96 seqline = AssertNot(Str(">")) + Std.sequence(UntilEol()) + AnyEol() 97 # can get a sequence line without an Eol at the end of a file 98 seqline_nonewline = AssertNot(Str(">")) + Std.sequence(Word()) 99 100 sequence = Std.sequence_block(Rep(seqline | seqline_nonewline)) 101 102 record = Std.record(comment_lines + title + sequence + Rep(AnyEol())) 103 104 # define a format which reads records, but allows #-style comments in 105 # the FASTA file 106 format = HeaderFooter("dataset", {"format": "fasta"}, 107 comment_lines, RecordReader.Until, (">",), 108 record, RecordReader.StartsWith, (">",), 109 comment_lines, RecordReader.Everything, () 110 ) 111