1
2
3
4
5
6 """
7 This module is OBSOLETE.
8 Most of the functionality in this module has moved to Bio.ExPASy.Prodoc;
9 please see
10
11 Bio.ExPASy.Prodoc.read To read a Prodoc file containing one entry.
12 Bio.ExPASy.Prodoc.parse Iterates over entries in a Prodoc file.
13 Bio.ExPASy.Prodoc.Record Holds Prodoc data.
14 Bio.ExPASy.Prodoc.Reference Holds data from a Prodoc reference.
15
16 The other functions and classes in Bio.Prosite.Prodoc (including
17 Bio.Prosite.Prodoc.index_file and Bio.Prosite.Prodoc.Dictionary) are
18 considered deprecated, and were not moved to Bio.ExPASy.Prodoc. If you use
19 this functionality, please contact the Biopython developers at
20 biopython-dev@biopython.org to avoid permanent removal of this module from
21 Biopython.
22
23
24
25
26 This module provides code to work with the prosite.doc file from
27 Prosite, available at http://www.expasy.ch/prosite/.
28
29 Tested with:
30 Release 15.0, July 1998
31 Release 16.0, July 1999
32 Release 20.22, 13 November 2007
33
34
35 Functions:
36 parse Iterates over entries in a Prodoc file.
37 index_file Index a Prodoc file for a Dictionary.
38 _extract_record Extract Prodoc data from a web page.
39
40
41 Classes:
42 Record Holds Prodoc data.
43 Reference Holds data from a Prodoc reference.
44 Dictionary Accesses a Prodoc file using a dictionary interface.
45 RecordParser Parses a Prodoc record into a Record object.
46
47 _Scanner Scans Prodoc-formatted data.
48 _RecordConsumer Consumes Prodoc data to a Record object.
49 Iterator Iterates over entries in a Prodoc file; DEPRECATED.
50 """
51
52 from types import *
53 import os
54 import sgmllib
55 from Bio import File
56 from Bio import Index
57 from Bio.ParserSupport import *
58
70
79
80
81
82
83
85 """Holds information from a Prodoc record.
86
87 Members:
88 accession Accession number of the record.
89 prosite_refs List of tuples (prosite accession, prosite name).
90 text Free format text.
91 references List of reference objects.
92
93 """
95 self.accession = ''
96 self.prosite_refs = []
97 self.text = ''
98 self.references = []
99
101 """Holds information from a Prodoc citation.
102
103 Members:
104 number Number of the reference. (string)
105 authors Names of the authors.
106 citation Describes the citation.
107
108 """
110 self.number = ''
111 self.authors = ''
112 self.citation = ''
113
115 """Returns one record at a time from a Prodoc file.
116
117 Methods:
118 next Return the next record from the stream, or None.
119
120 """
121 - def __init__(self, handle, parser=None):
122 """__init__(self, handle, parser=None)
123
124 Create a new iterator. handle is a file-like object. parser
125 is an optional Parser object to change the results into another form.
126 If set to None, then the raw contents of the file will be returned.
127
128 """
129 import warnings
130 warnings.warn("Bio.Prosite.Prodoc.Iterator is deprecated; we recommend using the function Bio.Prosite.Prodoc.parse instead. Please contact the Biopython developers at biopython-dev@biopython.org you cannot use Bio.Prosite.Prodoc.parse instead of Bio.Prosite.Prodoc.Iterator.",
131 DeprecationWarning)
132 if type(handle) is not FileType and type(handle) is not InstanceType:
133 raise ValueError("I expected a file handle or file-like object")
134 self._uhandle = File.UndoHandle(handle)
135 self._parser = parser
136
138 """next(self) -> object
139
140 Return the next Prodoc record from the file. If no more records,
141 return None.
142
143 """
144 lines = []
145 while 1:
146 line = self._uhandle.readline()
147 if not line:
148 break
149 lines.append(line)
150 if line[:5] == '{END}':
151 break
152
153 if not lines:
154 return None
155
156 data = "".join(lines)
157 if self._parser is not None:
158 return self._parser.parse(File.StringHandle(data))
159 return data
160
162 return iter(self.next, None)
163
165 """Accesses a Prodoc file using a dictionary interface.
166
167 """
168 __filename_key = '__filename'
169
170 - def __init__(self, indexname, parser=None):
171 """__init__(self, indexname, parser=None)
172
173 Open a Prodoc Dictionary. indexname is the name of the
174 index for the dictionary. The index should have been created
175 using the index_file function. parser is an optional Parser
176 object to change the results into another form. If set to None,
177 then the raw contents of the file will be returned.
178
179 """
180 self._index = Index.Index(indexname)
181 self._handle = open(self._index[Dictionary.__filename_key])
182 self._parser = parser
183
185 return len(self._index)
186
194
196 return getattr(self._index, name)
197
199 """Parses Prodoc data into a Record object.
200
201 """
205
206 - def parse(self, handle):
207 self._scanner.feed(handle, self._consumer)
208 return self._consumer.data
209
211 """Scans Prodoc-formatted data.
212
213 Tested with:
214 Release 15.0, July 1998
215
216 """
217 - def feed(self, handle, consumer):
218 """feed(self, handle, consumer)
219
220 Feed in Prodoc data for scanning. handle is a file-like
221 object that contains prosite data. consumer is a
222 Consumer object that will receive events as the report is scanned.
223
224 """
225 if isinstance(handle, File.UndoHandle):
226 uhandle = handle
227 else:
228 uhandle = File.UndoHandle(handle)
229
230 while 1:
231 line = uhandle.peekline()
232 if not line:
233 break
234 elif is_blank_line(line):
235
236 uhandle.readline()
237 continue
238 else:
239 self._scan_record(uhandle, consumer)
240
253
256
261
262 - def _scan_text(self, uhandle, consumer):
263 while 1:
264 line = safe_readline(uhandle)
265 if (line[0] == '[' and line[3] == ']' and line[4] == ' ') or \
266 line[:5] == '{END}':
267 uhandle.saveline(line)
268 break
269 consumer.text(line)
270
278
280
281
282 read_and_call_while(uhandle, consumer.noevent, blank=1)
283 if attempt_read_and_call(uhandle, consumer.noevent, start='+----'):
284 read_and_call_until(uhandle, consumer.noevent, start='+----')
285 read_and_call(uhandle, consumer.noevent, start='+----')
286 read_and_call_while(uhandle, consumer.noevent, blank=1)
287
289 """Consumer that converts a Prodoc record to a Record object.
290
291 Members:
292 data Record with Prodoc data.
293
294 """
297
300
303
305 line = line.rstrip()
306 if line[0] != '{' or line[-1] != '}':
307 raise ValueError("I don't understand accession line\n%s" % line)
308 acc = line[1:-1]
309 if acc[:4] != 'PDOC':
310 raise ValueError("Invalid accession in line\n%s" % line)
311 self.data.accession = acc
312
314 line = line.rstrip()
315 if line[0] != '{' or line[-1] != '}':
316 raise ValueError("I don't understand accession line\n%s" % line)
317 acc, name = line[1:-1].split('; ')
318 self.data.prosite_refs.append((acc, name))
319
320 - def text(self, line):
321 self.data.text = self.data.text + line
322
324 if line[0] == '[' and line[3] == ']':
325 self._ref = Reference()
326 self._ref.number = line[1:3].strip()
327 if line[1] == 'E':
328
329
330 self._ref.citation = line[4:].strip()
331 else:
332 self._ref.authors = line[4:].strip()
333 self.data.references.append(self._ref)
334 elif line[:4] == ' ':
335 if not self._ref:
336 raise ValueError("Unnumbered reference lines\n%s" % line)
337 self._ref.citation = self._ref.citation + line[5:]
338 else:
339 raise Exception("I don't understand the reference line\n%s" % line)
340
346
347 -def index_file(filename, indexname, rec2key=None):
348 """index_file(filename, indexname, rec2key=None)
349
350 Index a Prodoc file. filename is the name of the file.
351 indexname is the name of the dictionary. rec2key is an
352 optional callback that takes a Record and generates a unique key
353 (e.g. the accession number) for the record. If not specified,
354 the id name will be used.
355
356 """
357 import os
358 if not os.path.exists(filename):
359 raise ValueError("%s does not exist" % filename)
360
361 index = Index.Index(indexname, truncate=1)
362 index[Dictionary._Dictionary__filename_key] = filename
363
364 handle = open(filename)
365 records = parse(handle)
366 end = 0L
367 for record in records:
368 start = end
369 end = long(handle.tell())
370 length = end - start
371
372 if rec2key is not None:
373 key = rec2key(record)
374 else:
375 key = record.accession
376
377 if not key:
378 raise KeyError("empty key was produced")
379 elif key in index:
380 raise KeyError("duplicate key %s found" % key)
381
382 index[key] = start, length
383