1 """Utilities for working with FASTA-formatted sequences.
2
3 This module uses Martel-based parsing to speed up the parsing process.
4
5 Classes:
6 Record Holds FASTA sequence data.
7 Iterator Iterates over sequence data in a FASTA file.
8 Dictionary Accesses a FASTA file using a dictionary interface.
9 RecordParser Parses FASTA sequence data into a Record object.
10 SequenceParser Parses FASTA sequence data into a Sequence object.
11
12 Functions:
13 index_file Index a FASTA file for a Dictionary.
14 """
15 from Bio import Seq
16 from Bio import SeqRecord
17 from Bio import Alphabet
18
19
21 """Holds information from a FASTA record.
22
23 Members:
24 title Title line ('>' character not included).
25 sequence The sequence.
26
27 """
29 """__init__(self, colwidth=60)
30
31 Create a new Record. colwidth specifies the number of residues
32 to put on each line when generating FASTA format.
33
34 """
35 self.title = ''
36 self.sequence = ''
37 self._colwidth = colwidth
38
49
51 """Returns one record at a time from a FASTA file.
52 """
53 - def __init__(self, handle, parser = None, debug = 0):
54 """Initialize a new iterator.
55 """
56 self.handle = handle
57 self._parser = parser
58 self._debug = debug
59
60
61 while True :
62 line = handle.readline()
63 if line[0] == ">" :
64 break
65 if debug : print "Skipping: " + line
66 self._lookahead = line
67
69 return iter(self.next, None)
70
72 """Return the next record in the file"""
73 line = self._lookahead
74 if not line:
75 return None
76 assert line[0]==">", line
77 lines = [line.rstrip()]
78 line = self.handle.readline()
79 while line:
80 if line[0] == ">": break
81 if line[0] == "#" :
82 if self._debug : print "Ignoring comment line"
83 pass
84 else :
85 lines.append(line.rstrip())
86 line = self.handle.readline()
87 self._lookahead = line
88 if self._debug : print "Debug: '%s' and '%s'" % (title, "".join(lines))
89 if self._parser is None:
90 return "\n".join(lines)
91 else :
92 return self._parser.parse_string("\n".join(lines))
93
95 """Parses FASTA sequence data into a Fasta.Record object.
96 """
99
110
111 - def parse(self, handle):
113
115 """Parses FASTA sequence data into a SeqRecord object.
116 """
119 """Initialize a Scanner and Sequence Consumer.
120
121 Arguments:
122 o alphabet - The alphabet of the sequences to be parsed. If not
123 passed, this will be set as generic_alphabet.
124 o title2ids - A function that, when given the title of the FASTA
125 file (without the beginning >), will return the id, name and
126 description (in that order) for the record. If this is not given,
127 then the entire title line will be used as the description.
128 """
129 self.alphabet = alphabet
130 self.title2ids = title2ids
131
151
152 - def parse(self, handle):
154