1
2
3
4
5
6 """
7 This module provides code to work with files from Rebase.
8 http://rebase.neb.com/rebase/rebase.html
9
10
11 Classes:
12 Record Holds rebase sequence data.
13 Iterator Iterates over sequence data in a rebase file.
14 Dictionary Accesses a rebase file using a dictionary interface.
15 RecordParser Parses rebase sequence data into a Record object.
16
17 _Scanner Scans a rebase-format stream.
18 _RecordConsumer Consumes rebase data to a Record object.
19
20
21 Functions:
22 index_file Index a FASTA file for a Dictionary.
23
24 """
25
26 import warnings
27 warnings.warn("Bio.Rebase was deprecated, as it does not seem to be able to parse recent HTML files from Rebase. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning)
28
29
30 from types import *
31 import string
32 from Bio import File
33 from Bio import Index
34 from Bio.ParserSupport import *
35
37 """Holds information from a FASTA record.
38
39 Members:
40 seq_5_to_3 The sequence.
41 seq_3_to_5
42 enzyme_num The enzyme number
43 pos Position of cleavage
44 prototype Prototype
45 source
46 microorganism
47 temperature Growth temperature
48 misc Miscellaneous information
49 date_entered
50 date_modified
51 num_Adeno2
52 num_Lambda
53 num_pBR322
54 num_PhiX174
55 num_SV40
56
57 """
59 """__init__(self, colwidth=60)
60
61 Create a new Record. colwidth specifies the number of residues
62 to put on each line.
63
64 """
65 self.seq_5_to_3 = ''
66 self.seq_3_to_5 = ''
67 self.methylation = ''
68 self.enzyme_num = None
69 self.prototype = ''
70 self.source = ''
71 self.microorganism = ''
72 self.temperature = None
73 self.misc = ''
74 self.date_entered = ''
75 self.date_modified = ''
76 self._colwidth = colwidth
77 self.num_Adeno2 = 0
78 self.num_Lambda = 0
79 self.num_pBR322 = 0
80 self.num_PhiX174 = 0
81 self.num_SV40 = 0
82
84 """Returns one record at a time from a Rebase file.
85
86 Methods:
87 next Return the next record from the stream, or None.
88
89 """
90 - def __init__(self, handle, parser=None):
91 """__init__(self, handle, parser=None)
92
93 Create a new iterator. handle is a file-like object. parser
94 is an optional Parser object to change the results into another form.
95 If set to None, then the raw contents of the file will be returned.
96
97 """
98 if type(handle) is not FileType and type(handle) is not InstanceType:
99 raise ValueError, "I expected a file handle or file-like object"
100 self._uhandle = SGMLHandle( File.UndoHandle( handle ) )
101 self._parser = parser
102
104 """next(self) -> object
105
106 Return the next rebase record from the file. If no more records,
107 return None.
108
109 """
110 lines = []
111 first_tag = 'Recognition Sequence'
112 while 1:
113 line = self._uhandle.readline()
114 if not line:
115 break
116 if line[:len( first_tag )] == 'first_tag':
117 self._uhandle.saveline(line)
118 break
119
120 if not line:
121 return None
122
123 if self._parser is not None:
124 return self._parser.parse(File.StringHandle(data))
125 return data
126
128 return iter(self.next, None)
129
131 """Accesses a rebase file using a dictionary interface.
132
133 """
134 __filename_key = '__filename'
135
136 - def __init__(self, indexname, parser=None):
137 """__init__(self, indexname, parser=None)
138
139 Open a Fasta Dictionary. indexname is the name of the
140 index for the dictionary. The index should have been created
141 using the index_file function. parser is an optional Parser
142 object to change the results into another form. If set to None,
143 then the raw contents of the file will be returned.
144
145 """
146 self._index = Index.Index(indexname)
147 self._handle = open(self._index[Dictionary.__filename_key])
148 self._parser = parser
149
151 return len(self._index)
152
160
162 return getattr(self._index, name)
163
165 """Parses FASTA sequence data into a Record object.
166
167 """
171
172 - def parse(self, handle):
173 self._scanner.feed(handle, self._consumer)
174 return self._consumer.data
175
177 """Scans a rebase file.
178
179 Methods:
180 feed Feed in one rebase record.
181
182 """
183 - def feed(self, handle, consumer):
184 """feed(self, handle, consumer)
185
186 Feed in rebase data for scanning. handle is a file-like object
187 containing rebase data. consumer is a Consumer object that will
188 receive events as the rebase data is scanned.
189
190 """
191 if isinstance(handle, File.UndoHandle):
192 uhandle = handle
193 else:
194 uhandle = File.UndoHandle(handle)
195 uhandle = File.SGMLHandle( uhandle )
196
197 if uhandle.peekline():
198 self._scan_record(uhandle, consumer)
199
201 line = safe_readline( uhandle )
202 line = string.join( string.split( line ), ' ' ) + ' '
203 return line
204
205 - def _text_in( self, uhandle, text, count ):
206 for j in range( count ):
207 line = self._scan_line( uhandle )
208 text = text + line
209 return text
210
212 consumer.start_sequence()
213 text = ''
214 text = self._text_in( uhandle, text, 100 )
215 self._scan_sequence( text, consumer)
216 self._scan_methylation( text, consumer)
217 self._scan_enzyme_num( text, consumer )
218 self._scan_prototype( text, consumer )
219 self._scan_source( text, consumer )
220 self._scan_microorganism( text, consumer )
221 self._scan_temperature( text, consumer)
222 self._scan_date_entered( text, consumer)
223 self._scan_date_modified( text, consumer)
224 self._scan_Adeno2( text, consumer)
225 self._scan_Lambda( text, consumer)
226 self._scan_pBR322( text, consumer)
227 self._scan_PhiX174( text, consumer)
228 self._scan_SV40( text, consumer)
229
230
231
239
246
252
258
264
265
271
277
278
284
291
297
303
309
315
321
322
324 """Consumer that converts a rebase record to a Record object.
325
326 Members:
327 data Record with rebase data.
328
329 """
332
335
338
350
354
358
362
366
370
375
377 cols = string.split( line, ':' )
378 cols = string.split( cols[ 1 ] )
379 self.data.date_entered = string.join( cols[ :3 ] )
380
382 cols = string.split( line, ':' )
383 cols = string.split( cols[ 1 ] )
384 self.data.date_modified = string.join( cols[ :3 ] )
385
389
393
397
401
403 cols = string.split( line, ':' )
404 cols = string.split( cols[ 1 ], ' ' )
405 self.data.num_SV40 = cols[ 1 ]
406
407 -def index_file(filename, indexname, rec2key=None):
408 """index_file(filename, ind/exname, rec2key=None)
409
410 Index a rebase file. filename is the name of the file.
411 indexname is the name of the dictionary. rec2key is an
412 optional callback that takes a Record and generates a unique key
413 (e.g. the accession number) for the record. If not specified,
414 the sequence title will be used.
415
416 """
417 if not os.path.exists(filename):
418 raise ValueError, "%s does not exist" % filename
419
420 index = Index.Index(indexname, truncate=1)
421 index[Dictionary._Dictionary__filename_key] = filename
422
423 iter = Iterator(open(filename), parser=RecordParser())
424 while 1:
425 start = iter._uhandle.tell()
426 rec = iter.next()
427 length = iter._uhandle.tell() - start
428
429 if rec is None:
430 break
431 if rec2key is not None:
432 key = rec2key(rec)
433 else:
434 key = rec.title
435
436 if not key:
437 raise KeyError, "empty sequence key was produced"
438 elif index.has_key(key):
439 raise KeyError, "duplicate key %s found" % key
440
441 index[key] = start, length
442