1
2
3
4
5
6 from Bio.Alphabet import generic_alphabet
7
9 """Base class for building SeqRecord iterators.
10
11 You should write a next() method to return SeqRecord
12 objects. You may wish to redefine the __init__
13 method as well.
14 """
16 """Create a SequenceIterator object.
17
18 handle - input file
19 alphabet - optional, e.g. Bio.Alphabet.generic_protein
20
21 Note when subclassing:
22 - there should be a single non-optional argument,
23 the handle.
24 - you do not have to require an alphabet.
25 - you can add additional optional arguments."""
26 self.handle = handle
27 self.alphabet = alphabet
28
29
30
31
32
33
35 """Return the next record in the file.
36
37 This method should be replaced by any derived class to do something useful."""
38 raise NotImplementedError, "This object should be subclassed"
39
40
41
42
43
44
46 """Iterate over the entries as a SeqRecord objects.
47
48 Example usage for Fasta files:
49
50 myFile = open("example.fasta","r")
51 myFastaReader = FastaIterator(myFile)
52 for record in myFastaReader :
53 print record.id
54 print record.seq
55 myFile.close()"""
56 return iter(self.next, None)
57
59 """Base class for any iterator of a non-sequential file type.
60
61 This object is not intended for use directly.
62
63 When writing a parser for any interlaced sequence file where the whole
64 file must be read in order to extract any single record, then you should
65 subclass this object.
66
67 All you need to do is to define your own:
68 (1) __init__ method to parse the file and call self.move_start()
69 (2) __len__ method to return the number of records
70 (3) __getitem__ to return any requested record.
71
72 This class will then provide the iterator methods including next(), but relies
73 on knowing the total number of records and tracking the pending record index in
74 as self._n
75
76 It is up to the subclassed object to decide if it wants to generate a cache of
77 SeqRecords when initialised, or simply use its own lists and dicts and create
78 SeqRecords on request.
79 """
80
82 """Create the object.
83
84 This method should be replaced by any derived class to do something useful."""
85
86 self.move_start()
87 raise NotImplementedError, "This object method should be subclassed"
88
89
90
91
93 """Return the number of records.
94
95 This method should be replaced by any derived class to do something useful."""
96 raise NotImplementedError, "This object method should be subclassed"
97
98
99
100
102 """Return the requested record.
103
104 This method should be replaced by any derived class to do something
105 useful.
106
107 It should NOT touch the value of self._n"""
108 raise NotImplementedError, "This object method should be subclassed"
109
110
111
112
115
117 next_record = self._n
118 if next_record < len(self) :
119 self._n = next_record+1
120 return self[next_record]
121 else :
122
123 return None
124
126 return iter(self.next, None)
127
129 """This class should be subclassed.
130
131 Interlaced file formats (e.g. Clustal) should subclass directly.
132
133 Sequential file formats (e.g. Fasta, GenBank) should subclass
134 the SequentialSequenceWriter class instead.
135 """
137 """Creates the writer object.
138
139 Use the method write_file() to actually record your sequence records."""
140 self.handle = handle
141
143 """Use this to avoid getting newlines in the output."""
144 answer = text
145 for x in ["\n", "\r"] :
146 answer = answer.replace(x, " ")
147 return answer.replace(" ", " ")
148
150 """Use this to write an entire file containing the given records.
151
152 records - A list or iterator returning SeqRecord objects
153
154 This method can only be called once."""
155
156 raise NotImplementedError, "This object should be subclassed"
157
158
159
160
162 """This class should be subclassed.
163
164 It is intended for sequential file formats with an (optional)
165 header, repeated records, and an (optional) footer.
166
167 In this case (as with interlaced file formats), the user may
168 simply call the write_file() method and be done.
169
170 However, they may also call the write_header(), followed
171 by multiple calls to write_record() and/or write_records()
172 followed finally by write_footer().
173
174 Users must call write_header() and write_footer() even when
175 the file format concerned doesn't have a header or footer.
176 This is to try and make life as easy as possible when
177 switching the output format.
178
179 Note that write_header() cannot require any assumptions about
180 the number of records.
181 """
187
189 assert not self._header_written, "You have aleady called write_header()"
190 assert not self._record_written, "You have aleady called write_record() or write_records()"
191 assert not self._footer_written, "You have aleady called write_footer()"
192 self._header_written = True
193
199
201 """Write a single record to the output file.
202
203 record - a SeqRecord object
204
205 Once you have called write_header() you can call write_record()
206 and/or write_records() as many times as needed. Then call
207 write_footer() and close()."""
208 assert self._header_written, "You must call write_header() first"
209 assert not self._footer_written, "You have already called write_footer()"
210 self._record_written = True
211 raise NotImplementedError, "This object should be subclassed"
212
213
214
215
217 """Write multiple record to the output file.
218
219 records - A list or iterator returning SeqRecord objects
220
221 Once you have called write_header() you can call write_record()
222 and/or write_records() as many times as needed. Then call
223 write_footer() and close()."""
224
225 assert self._header_written, "You must call write_header() first"
226 assert not self._footer_written, "You have already called write_footer()"
227 for record in records :
228 self.write_record(record)
229
230 self._record_written = True
231
233 """Use this to write an entire file containing the given records.
234
235 records - A list or iterator returning SeqRecord objects
236
237 This method can only be called once."""
238 self.write_header()
239 self.write_records(records)
240 self.write_footer()
241