1
2
3
4
5 """
6 Bio.SeqIO support module (not for general use).
7
8 Unless you are writing a new parser or writer for Bio.SeqIO, you should not
9 use this module. It provides base classes to try and simplify things.
10 """
11
12 from Bio.Alphabet import generic_alphabet
13
15 """Base class for building SeqRecord iterators.
16
17 You should write a next() method to return SeqRecord
18 objects. You may wish to redefine the __init__
19 method as well.
20 """
22 """Create a SequenceIterator object.
23
24 handle - input file
25 alphabet - optional, e.g. Bio.Alphabet.generic_protein
26
27 Note when subclassing:
28 - there should be a single non-optional argument,
29 the handle.
30 - you do not have to require an alphabet.
31 - you can add additional optional arguments."""
32 self.handle = handle
33 self.alphabet = alphabet
34
35
36
37
38
39
41 """Return the next record in the file.
42
43 This method should be replaced by any derived class to do something useful."""
44 raise NotImplementedError("This object should be subclassed")
45
46
47
48
49
50
52 """Iterate over the entries as a SeqRecord objects.
53
54 Example usage for Fasta files:
55
56 myFile = open("example.fasta","r")
57 myFastaReader = FastaIterator(myFile)
58 for record in myFastaReader :
59 print record.id
60 print record.seq
61 myFile.close()"""
62 return iter(self.next, None)
63
65 """Base class for any iterator of a non-sequential file type.
66
67 This object is not intended for use directly.
68
69 When writing a parser for any interlaced sequence file where the whole
70 file must be read in order to extract any single record, then you should
71 subclass this object.
72
73 All you need to do is to define your own:
74 (1) __init__ method to parse the file and call self.move_start()
75 (2) __len__ method to return the number of records
76 (3) __getitem__ to return any requested record.
77
78 This class will then provide the iterator methods including next(), but relies
79 on knowing the total number of records and tracking the pending record index in
80 as self._n
81
82 It is up to the subclassed object to decide if it wants to generate a cache of
83 SeqRecords when initialised, or simply use its own lists and dicts and create
84 SeqRecords on request.
85 """
86
88 """Create the object.
89
90 This method should be replaced by any derived class to do something useful."""
91
92 self.move_start()
93 raise NotImplementedError("This object method should be subclassed")
94
95
96
97
99 """Return the number of records.
100
101 This method should be replaced by any derived class to do something useful."""
102 raise NotImplementedError("This object method should be subclassed")
103
104
105
106
108 """Return the requested record.
109
110 This method should be replaced by any derived class to do something
111 useful.
112
113 It should NOT touch the value of self._n"""
114 raise NotImplementedError("This object method should be subclassed")
115
116
117
118
121
123 next_record = self._n
124 if next_record < len(self) :
125 self._n = next_record+1
126 return self[next_record]
127 else :
128
129 return None
130
132 return iter(self.next, None)
133
135 """This class should be subclassed.
136
137 Interlaced file formats (e.g. Clustal) should subclass directly.
138
139 Sequential file formats (e.g. Fasta, GenBank) should subclass
140 the SequentialSequenceWriter class instead.
141 """
143 """Creates the writer object.
144
145 Use the method write_file() to actually record your sequence records."""
146 self.handle = handle
147
149 """Use this to catch errors like the sequence being None."""
150 try :
151
152
153
154
155 return record.seq.tostring()
156 except AttributeError :
157 if record.seq is None :
158
159
160 raise TypeError("SeqRecord (id=%s) has None for its sequence." \
161 % record.id)
162 else :
163 raise TypeError("SeqRecord (id=%s) has an invalid sequence." \
164 % record.id)
165
167 """Use this to avoid getting newlines in the output."""
168 return text.replace("\n", " ").replace("\r", " ").replace(" ", " ")
169
171 """Use this to write an entire file containing the given records.
172
173 records - A list or iterator returning SeqRecord objects
174
175 Should return the number of records (as an integer).
176
177 This method can only be called once."""
178
179
180 raise NotImplementedError("This object should be subclassed")
181
182
183
184
186 """This class should be subclassed.
187
188 It is intended for sequential file formats with an (optional)
189 header, repeated records, and an (optional) footer.
190
191 In this case (as with interlaced file formats), the user may
192 simply call the write_file() method and be done.
193
194 However, they may also call the write_header(), followed
195 by multiple calls to write_record() and/or write_records()
196 followed finally by write_footer().
197
198 Users must call write_header() and write_footer() even when
199 the file format concerned doesn't have a header or footer.
200 This is to try and make life as easy as possible when
201 switching the output format.
202
203 Note that write_header() cannot require any assumptions about
204 the number of records.
205 """
207 self.handle = handle
208 self._header_written = False
209 self._record_written = False
210 self._footer_written = False
211
213 assert not self._header_written, "You have aleady called write_header()"
214 assert not self._record_written, "You have aleady called write_record() or write_records()"
215 assert not self._footer_written, "You have aleady called write_footer()"
216 self._header_written = True
217
223
225 """Write a single record to the output file.
226
227 record - a SeqRecord object
228
229 Once you have called write_header() you can call write_record()
230 and/or write_records() as many times as needed. Then call
231 write_footer() and close()."""
232 assert self._header_written, "You must call write_header() first"
233 assert not self._footer_written, "You have already called write_footer()"
234 self._record_written = True
235 raise NotImplementedError("This object should be subclassed")
236
237
238
239
241 """Write multiple record to the output file.
242
243 records - A list or iterator returning SeqRecord objects
244
245 Once you have called write_header() you can call write_record()
246 and/or write_records() as many times as needed. Then call
247 write_footer() and close().
248
249 Returns the number of records written.
250 """
251
252 assert self._header_written, "You must call write_header() first"
253 assert not self._footer_written, "You have already called write_footer()"
254 count = 0
255 for record in records :
256 self.write_record(record)
257 count += 1
258
259 self._record_written = True
260 return count
261
263 """Use this to write an entire file containing the given records.
264
265 records - A list or iterator returning SeqRecord objects
266
267 This method can only be called once. Returns the number of records
268 written.
269 """
270 self.write_header()
271 count = self.write_records(records)
272 self.write_footer()
273 return count
274