Package Bio :: Package Prosite :: Module Prodoc
[hide private]
[frames] | no frames]

Source Code for Module Bio.Prosite.Prodoc

  1  # Copyright 2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module is OBSOLETE. 
  8  Most of the functionality in this module has moved to Bio.ExPASy.Prodoc; 
  9  please see 
 10   
 11  Bio.ExPASy.Prodoc.read          To read a Prodoc file containing one entry. 
 12  Bio.ExPASy.Prodoc.parse         Iterates over entries in a Prodoc file. 
 13  Bio.ExPASy.Prodoc.Record        Holds Prodoc data. 
 14  Bio.ExPASy.Prodoc.Reference     Holds data from a Prodoc reference. 
 15   
 16  The other functions and classes in Bio.Prosite.Prodoc (including 
 17  Bio.Prosite.Prodoc.index_file and Bio.Prosite.Prodoc.Dictionary) are 
 18  considered deprecated, and were not moved to Bio.ExPASy.Prodoc. If you use 
 19  this functionality, please contact the Biopython developers at 
 20  biopython-dev@biopython.org to avoid permanent removal of this module from 
 21  Biopython. 
 22   
 23   
 24   
 25   
 26  This module provides code to work with the prosite.doc file from 
 27  Prosite, available at http://www.expasy.ch/prosite/. 
 28   
 29  Tested with: 
 30  Release 15.0, July 1998 
 31  Release 16.0, July 1999 
 32  Release 20.22, 13 November 2007 
 33   
 34   
 35  Functions: 
 36  parse              Iterates over entries in a Prodoc file. 
 37  index_file         Index a Prodoc file for a Dictionary. 
 38  _extract_record    Extract Prodoc data from a web page. 
 39   
 40   
 41  Classes: 
 42  Record             Holds Prodoc data. 
 43  Reference          Holds data from a Prodoc reference. 
 44  Dictionary         Accesses a Prodoc file using a dictionary interface. 
 45  RecordParser       Parses a Prodoc record into a Record object. 
 46   
 47  _Scanner           Scans Prodoc-formatted data. 
 48  _RecordConsumer    Consumes Prodoc data to a Record object. 
 49  Iterator           Iterates over entries in a Prodoc file; DEPRECATED. 
 50  """ 
 51   
 52  from types import * 
 53  import os 
 54  import sgmllib 
 55  from Bio import File 
 56  from Bio import Index 
 57  from Bio.ParserSupport import * 
 58   
59 -def parse(handle):
60 import cStringIO 61 parser = RecordParser() 62 text = "" 63 for line in handle: 64 text += line 65 if line[:5] == '{END}': 66 handle = cStringIO.StringIO(text) 67 record = parser.parse(handle) 68 text = "" 69 yield record
70
71 -def read(handle):
72 parser = RecordParser() 73 record = parser.parse(handle) 74 # We should have reached the end of the record by now 75 remainder = handle.read() 76 if remainder: 77 raise ValueError("More than one Prodoc record found") 78 return record
79 80 81 # It may be a good idea to rewrite read(), parse() at some point to avoid 82 # using the old-style "parser = RecordParser(); parser.parse(handle)" approach. 83
84 -class Record:
85 """Holds information from a Prodoc record. 86 87 Members: 88 accession Accession number of the record. 89 prosite_refs List of tuples (prosite accession, prosite name). 90 text Free format text. 91 references List of reference objects. 92 93 """
94 - def __init__(self):
95 self.accession = '' 96 self.prosite_refs = [] 97 self.text = '' 98 self.references = []
99
100 -class Reference:
101 """Holds information from a Prodoc citation. 102 103 Members: 104 number Number of the reference. (string) 105 authors Names of the authors. 106 citation Describes the citation. 107 108 """
109 - def __init__(self):
110 self.number = '' 111 self.authors = '' 112 self.citation = ''
113
114 -class Iterator:
115 """Returns one record at a time from a Prodoc file. 116 117 Methods: 118 next Return the next record from the stream, or None. 119 120 """
121 - def __init__(self, handle, parser=None):
122 """__init__(self, handle, parser=None) 123 124 Create a new iterator. handle is a file-like object. parser 125 is an optional Parser object to change the results into another form. 126 If set to None, then the raw contents of the file will be returned. 127 128 """ 129 import warnings 130 warnings.warn("Bio.Prosite.Prodoc.Iterator is deprecated; we recommend using the function Bio.Prosite.Prodoc.parse instead. Please contact the Biopython developers at biopython-dev@biopython.org you cannot use Bio.Prosite.Prodoc.parse instead of Bio.Prosite.Prodoc.Iterator.", 131 DeprecationWarning) 132 if type(handle) is not FileType and type(handle) is not InstanceType: 133 raise ValueError("I expected a file handle or file-like object") 134 self._uhandle = File.UndoHandle(handle) 135 self._parser = parser
136
137 - def next(self):
138 """next(self) -> object 139 140 Return the next Prodoc record from the file. If no more records, 141 return None. 142 143 """ 144 lines = [] 145 while 1: 146 line = self._uhandle.readline() 147 if not line: 148 break 149 lines.append(line) 150 if line[:5] == '{END}': 151 break 152 153 if not lines: 154 return None 155 156 data = "".join(lines) 157 if self._parser is not None: 158 return self._parser.parse(File.StringHandle(data)) 159 return data
160
161 - def __iter__(self):
162 return iter(self.next, None)
163
164 -class Dictionary:
165 """Accesses a Prodoc file using a dictionary interface. 166 167 """ 168 __filename_key = '__filename' 169
170 - def __init__(self, indexname, parser=None):
171 """__init__(self, indexname, parser=None) 172 173 Open a Prodoc Dictionary. indexname is the name of the 174 index for the dictionary. The index should have been created 175 using the index_file function. parser is an optional Parser 176 object to change the results into another form. If set to None, 177 then the raw contents of the file will be returned. 178 179 """ 180 self._index = Index.Index(indexname) 181 self._handle = open(self._index[Dictionary.__filename_key]) 182 self._parser = parser
183
184 - def __len__(self):
185 return len(self._index)
186
187 - def __getitem__(self, key):
188 start, len = self._index[key] 189 self._handle.seek(start) 190 data = self._handle.read(len) 191 if self._parser is not None: 192 return self._parser.parse(File.StringHandle(data)) 193 return data
194
195 - def __getattr__(self, name):
196 return getattr(self._index, name)
197
198 -class RecordParser(AbstractParser):
199 """Parses Prodoc data into a Record object. 200 201 """
202 - def __init__(self):
203 self._scanner = _Scanner() 204 self._consumer = _RecordConsumer()
205
206 - def parse(self, handle):
207 self._scanner.feed(handle, self._consumer) 208 return self._consumer.data
209
210 -class _Scanner:
211 """Scans Prodoc-formatted data. 212 213 Tested with: 214 Release 15.0, July 1998 215 216 """
217 - def feed(self, handle, consumer):
218 """feed(self, handle, consumer) 219 220 Feed in Prodoc data for scanning. handle is a file-like 221 object that contains prosite data. consumer is a 222 Consumer object that will receive events as the report is scanned. 223 224 """ 225 if isinstance(handle, File.UndoHandle): 226 uhandle = handle 227 else: 228 uhandle = File.UndoHandle(handle) 229 230 while 1: 231 line = uhandle.peekline() 232 if not line: 233 break 234 elif is_blank_line(line): 235 # Skip blank lines between records 236 uhandle.readline() 237 continue 238 else: 239 self._scan_record(uhandle, consumer)
240
241 - def _scan_record(self, uhandle, consumer):
242 consumer.start_record() 243 244 self._scan_accession(uhandle, consumer) 245 self._scan_prosite_refs(uhandle, consumer) 246 read_and_call(uhandle, consumer.noevent, start='{BEGIN}') 247 self._scan_text(uhandle, consumer) 248 self._scan_refs(uhandle, consumer) 249 self._scan_copyright(uhandle, consumer) 250 read_and_call(uhandle, consumer.noevent, start='{END}') 251 252 consumer.end_record()
253
254 - def _scan_accession(self, uhandle, consumer):
255 read_and_call(uhandle, consumer.accession, start='{PDOC')
256
257 - def _scan_prosite_refs(self, uhandle, consumer):
258 while attempt_read_and_call(uhandle, consumer.prosite_reference, 259 start='{PS'): 260 pass
261
262 - def _scan_text(self, uhandle, consumer):
263 while 1: 264 line = safe_readline(uhandle) 265 if (line[0] == '[' and line[3] == ']' and line[4] == ' ') or \ 266 line[:5] == '{END}': 267 uhandle.saveline(line) 268 break 269 consumer.text(line)
270
271 - def _scan_refs(self, uhandle, consumer):
272 while 1: 273 line = safe_readline(uhandle) 274 if line[:5] == '{END}' or is_blank_line(line): 275 uhandle.saveline(line) 276 break 277 consumer.reference(line)
278
287
288 -class _RecordConsumer(AbstractConsumer):
289 """Consumer that converts a Prodoc record to a Record object. 290 291 Members: 292 data Record with Prodoc data. 293 294 """
295 - def __init__(self):
296 self.data = None
297
298 - def start_record(self):
299 self.data = Record()
300
301 - def end_record(self):
302 self._clean_data()
303
304 - def accession(self, line):
305 line = line.rstrip() 306 if line[0] != '{' or line[-1] != '}': 307 raise ValueError("I don't understand accession line\n%s" % line) 308 acc = line[1:-1] 309 if acc[:4] != 'PDOC': 310 raise ValueError("Invalid accession in line\n%s" % line) 311 self.data.accession = acc
312
313 - def prosite_reference(self, line):
314 line = line.rstrip() 315 if line[0] != '{' or line[-1] != '}': 316 raise ValueError("I don't understand accession line\n%s" % line) 317 acc, name = line[1:-1].split('; ') 318 self.data.prosite_refs.append((acc, name))
319
320 - def text(self, line):
321 self.data.text = self.data.text + line
322
323 - def reference(self, line):
324 if line[0] == '[' and line[3] == ']': # new reference 325 self._ref = Reference() 326 self._ref.number = line[1:3].strip() 327 if line[1] == 'E': 328 # If it's an electronic reference, then the URL is on the 329 # line, instead of the author. 330 self._ref.citation = line[4:].strip() 331 else: 332 self._ref.authors = line[4:].strip() 333 self.data.references.append(self._ref) 334 elif line[:4] == ' ': 335 if not self._ref: 336 raise ValueError("Unnumbered reference lines\n%s" % line) 337 self._ref.citation = self._ref.citation + line[5:] 338 else: 339 raise Exception("I don't understand the reference line\n%s" % line)
340
341 - def _clean_data(self):
342 # get rid of trailing newlines 343 for ref in self.data.references: 344 ref.citation = ref.citation.rstrip() 345 ref.authors = ref.authors.rstrip()
346
347 -def index_file(filename, indexname, rec2key=None):
348 """index_file(filename, indexname, rec2key=None) 349 350 Index a Prodoc file. filename is the name of the file. 351 indexname is the name of the dictionary. rec2key is an 352 optional callback that takes a Record and generates a unique key 353 (e.g. the accession number) for the record. If not specified, 354 the id name will be used. 355 356 """ 357 import os 358 if not os.path.exists(filename): 359 raise ValueError("%s does not exist" % filename) 360 361 index = Index.Index(indexname, truncate=1) 362 index[Dictionary._Dictionary__filename_key] = filename 363 364 handle = open(filename) 365 records = parse(handle) 366 end = 0L 367 for record in records: 368 start = end 369 end = long(handle.tell()) 370 length = end - start 371 372 if rec2key is not None: 373 key = rec2key(record) 374 else: 375 key = record.accession 376 377 if not key: 378 raise KeyError("empty key was produced") 379 elif key in index: 380 raise KeyError("duplicate key %s found" % key) 381 382 index[key] = start, length
383