1
2
3
4
5 """
6 Bio.SeqIO support module (not for general use).
7
8 Unless you are writing a new parser or writer for Bio.SeqIO, you should not
9 use this module. It provides base classes to try and simplify things.
10 """
11
12 from Bio.Alphabet import generic_alphabet
13
15 """Base class for building SeqRecord iterators.
16
17 You should write a next() method to return SeqRecord
18 objects. You may wish to redefine the __init__
19 method as well.
20 """
22 """Create a SequenceIterator object.
23
24 handle - input file
25 alphabet - optional, e.g. Bio.Alphabet.generic_protein
26
27 Note when subclassing:
28 - there should be a single non-optional argument,
29 the handle.
30 - you do not have to require an alphabet.
31 - you can add additional optional arguments."""
32 self.handle = handle
33 self.alphabet = alphabet
34
35
36
37
38
39
41 """Return the next record in the file.
42
43 This method should be replaced by any derived class to do something useful."""
44 raise NotImplementedError("This object should be subclassed")
45
46
47
48
49
50
52 """Iterate over the entries as a SeqRecord objects.
53
54 Example usage for Fasta files:
55
56 myFile = open("example.fasta","r")
57 myFastaReader = FastaIterator(myFile)
58 for record in myFastaReader :
59 print record.id
60 print record.seq
61 myFile.close()"""
62 return iter(self.next, None)
63
65 """Base class for any iterator of a non-sequential file type.
66
67 This object is not intended for use directly.
68
69 When writing a parser for any interlaced sequence file where the whole
70 file must be read in order to extract any single record, then you should
71 subclass this object.
72
73 All you need to do is to define your own:
74 (1) __init__ method to parse the file and call self.move_start()
75 (2) __len__ method to return the number of records
76 (3) __getitem__ to return any requested record.
77
78 This class will then provide the iterator methods including next(), but relies
79 on knowing the total number of records and tracking the pending record index in
80 as self._n
81
82 It is up to the subclassed object to decide if it wants to generate a cache of
83 SeqRecords when initialised, or simply use its own lists and dicts and create
84 SeqRecords on request.
85 """
86
88 """Create the object.
89
90 This method should be replaced by any derived class to do something useful."""
91
92 self.move_start()
93 raise NotImplementedError("This object method should be subclassed")
94
95
96
97
99 """Return the number of records.
100
101 This method should be replaced by any derived class to do something useful."""
102 raise NotImplementedError("This object method should be subclassed")
103
104
105
106
108 """Return the requested record.
109
110 This method should be replaced by any derived class to do something
111 useful.
112
113 It should NOT touch the value of self._n"""
114 raise NotImplementedError("This object method should be subclassed")
115
116
117
118
121
123 next_record = self._n
124 if next_record < len(self) :
125 self._n = next_record+1
126 return self[next_record]
127 else :
128
129 return None
130
132 return iter(self.next, None)
133
135 """This class should be subclassed.
136
137 Interlaced file formats (e.g. Clustal) should subclass directly.
138
139 Sequential file formats (e.g. Fasta, GenBank) should subclass
140 the SequentialSequenceWriter class instead.
141 """
143 """Creates the writer object.
144
145 Use the method write_file() to actually record your sequence records."""
146 self.handle = handle
147
149 """Use this to catch errors like the sequence being None."""
150 try :
151
152
153
154
155 return record.seq.tostring()
156 except AttributeError :
157 if record.seq is None :
158
159
160 raise TypeError("SeqRecord (id=%s) has None for its sequence." \
161 % record.id)
162 else :
163 raise TypeError("SeqRecord (id=%s) has an invalid sequence." \
164 % record.id)
165
167 """Use this to avoid getting newlines in the output."""
168 answer = text
169 for x in ["\n", "\r"] :
170 answer = answer.replace(x, " ")
171 return answer.replace(" ", " ")
172
174 """Use this to write an entire file containing the given records.
175
176 records - A list or iterator returning SeqRecord objects
177
178 Should return the number of records (as an integer).
179
180 This method can only be called once."""
181
182 raise NotImplementedError("This object should be subclassed")
183
184
185
186
188 """This class should be subclassed.
189
190 It is intended for sequential file formats with an (optional)
191 header, repeated records, and an (optional) footer.
192
193 In this case (as with interlaced file formats), the user may
194 simply call the write_file() method and be done.
195
196 However, they may also call the write_header(), followed
197 by multiple calls to write_record() and/or write_records()
198 followed finally by write_footer().
199
200 Users must call write_header() and write_footer() even when
201 the file format concerned doesn't have a header or footer.
202 This is to try and make life as easy as possible when
203 switching the output format.
204
205 Note that write_header() cannot require any assumptions about
206 the number of records.
207 """
213
215 assert not self._header_written, "You have aleady called write_header()"
216 assert not self._record_written, "You have aleady called write_record() or write_records()"
217 assert not self._footer_written, "You have aleady called write_footer()"
218 self._header_written = True
219
225
227 """Write a single record to the output file.
228
229 record - a SeqRecord object
230
231 Once you have called write_header() you can call write_record()
232 and/or write_records() as many times as needed. Then call
233 write_footer() and close()."""
234 assert self._header_written, "You must call write_header() first"
235 assert not self._footer_written, "You have already called write_footer()"
236 self._record_written = True
237 raise NotImplementedError("This object should be subclassed")
238
239
240
241
243 """Write multiple record to the output file.
244
245 records - A list or iterator returning SeqRecord objects
246
247 Once you have called write_header() you can call write_record()
248 and/or write_records() as many times as needed. Then call
249 write_footer() and close().
250
251 Returns the number of records written.
252 """
253
254 assert self._header_written, "You must call write_header() first"
255 assert not self._footer_written, "You have already called write_footer()"
256 count = 0
257 for record in records :
258 self.write_record(record)
259 count += 1
260
261 self._record_written = True
262 return count
263
265 """Use this to write an entire file containing the given records.
266
267 records - A list or iterator returning SeqRecord objects
268
269 This method can only be called once. Returns the number of records
270 written.
271 """
272 self.write_header()
273 count = self.write_records(records)
274 self.write_footer()
275 return count
276