Package Bio :: Package Prosite :: Module Prodoc
[hide private]
[frames] | no frames]

Source Code for Module Bio.Prosite.Prodoc

  1  # Copyright 2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module is OBSOLETE. 
  8  Most of the functionality in this module has moved to Bio.ExPASy.Prodoc; 
  9  please see 
 10   
 11  Bio.ExPASy.Prodoc.read          To read a Prodoc file containing one entry. 
 12  Bio.ExPASy.Prodoc.parse         Iterates over entries in a Prodoc file. 
 13  Bio.ExPASy.Prodoc.Record        Holds Prodoc data. 
 14  Bio.ExPASy.Prodoc.Reference     Holds data from a Prodoc reference. 
 15   
 16  The other functions and classes in Bio.Prosite.Prodoc (including 
 17  Bio.Prosite.Prodoc.index_file and Bio.Prosite.Prodoc.Dictionary) are 
 18  considered deprecated, and were not moved to Bio.ExPASy.Prodoc. If you use 
 19  this functionality, please contact the Biopython developers at 
 20  biopython-dev@biopython.org to avoid permanent removal of this module from 
 21  Biopython. 
 22   
 23   
 24   
 25   
 26  This module provides code to work with the prosite.doc file from 
 27  Prosite, available at http://www.expasy.ch/prosite/. 
 28   
 29  Tested with: 
 30  Release 15.0, July 1998 
 31  Release 16.0, July 1999 
 32  Release 20.22, 13 November 2007 
 33   
 34   
 35  Functions: 
 36  parse              Iterates over entries in a Prodoc file. 
 37  index_file         Index a Prodoc file for a Dictionary. 
 38  _extract_record    Extract Prodoc data from a web page. 
 39   
 40   
 41  Classes: 
 42  Record             Holds Prodoc data. 
 43  Reference          Holds data from a Prodoc reference. 
 44  Dictionary         Accesses a Prodoc file using a dictionary interface. 
 45  RecordParser       Parses a Prodoc record into a Record object. 
 46   
 47  _Scanner           Scans Prodoc-formatted data. 
 48  _RecordConsumer    Consumes Prodoc data to a Record object. 
 49  """ 
 50   
 51  from types import * 
 52  import os 
 53  import sgmllib 
 54  from Bio import File 
 55  from Bio import Index 
 56  from Bio.ParserSupport import * 
 57   
58 -def parse(handle):
59 import cStringIO 60 parser = RecordParser() 61 text = "" 62 for line in handle: 63 text += line 64 if line[:5] == '{END}': 65 handle = cStringIO.StringIO(text) 66 record = parser.parse(handle) 67 text = "" 68 yield record
69
70 -def read(handle):
71 parser = RecordParser() 72 record = parser.parse(handle) 73 # We should have reached the end of the record by now 74 remainder = handle.read() 75 if remainder: 76 raise ValueError("More than one Prodoc record found") 77 return record
78 79 80 # It may be a good idea to rewrite read(), parse() at some point to avoid 81 # using the old-style "parser = RecordParser(); parser.parse(handle)" approach. 82
83 -class Record:
84 """Holds information from a Prodoc record. 85 86 Members: 87 accession Accession number of the record. 88 prosite_refs List of tuples (prosite accession, prosite name). 89 text Free format text. 90 references List of reference objects. 91 92 """
93 - def __init__(self):
94 self.accession = '' 95 self.prosite_refs = [] 96 self.text = '' 97 self.references = []
98
99 -class Reference:
100 """Holds information from a Prodoc citation. 101 102 Members: 103 number Number of the reference. (string) 104 authors Names of the authors. 105 citation Describes the citation. 106 107 """
108 - def __init__(self):
109 self.number = '' 110 self.authors = '' 111 self.citation = ''
112
113 -class Dictionary:
114 """Accesses a Prodoc file using a dictionary interface. 115 116 """ 117 __filename_key = '__filename' 118
119 - def __init__(self, indexname, parser=None):
120 """__init__(self, indexname, parser=None) 121 122 Open a Prodoc Dictionary. indexname is the name of the 123 index for the dictionary. The index should have been created 124 using the index_file function. parser is an optional Parser 125 object to change the results into another form. If set to None, 126 then the raw contents of the file will be returned. 127 128 """ 129 self._index = Index.Index(indexname) 130 self._handle = open(self._index[Dictionary.__filename_key]) 131 self._parser = parser
132
133 - def __len__(self):
134 return len(self._index)
135
136 - def __getitem__(self, key):
137 start, len = self._index[key] 138 self._handle.seek(start) 139 data = self._handle.read(len) 140 if self._parser is not None: 141 return self._parser.parse(File.StringHandle(data)) 142 return data
143
144 - def __getattr__(self, name):
145 return getattr(self._index, name)
146
147 -class RecordParser(AbstractParser):
148 """Parses Prodoc data into a Record object. 149 150 """
151 - def __init__(self):
152 self._scanner = _Scanner() 153 self._consumer = _RecordConsumer()
154
155 - def parse(self, handle):
156 self._scanner.feed(handle, self._consumer) 157 return self._consumer.data
158
159 -class _Scanner:
160 """Scans Prodoc-formatted data. 161 162 Tested with: 163 Release 15.0, July 1998 164 165 """
166 - def feed(self, handle, consumer):
167 """feed(self, handle, consumer) 168 169 Feed in Prodoc data for scanning. handle is a file-like 170 object that contains prosite data. consumer is a 171 Consumer object that will receive events as the report is scanned. 172 173 """ 174 if isinstance(handle, File.UndoHandle): 175 uhandle = handle 176 else: 177 uhandle = File.UndoHandle(handle) 178 179 while 1: 180 line = uhandle.peekline() 181 if not line: 182 break 183 elif is_blank_line(line): 184 # Skip blank lines between records 185 uhandle.readline() 186 continue 187 else: 188 self._scan_record(uhandle, consumer)
189
190 - def _scan_record(self, uhandle, consumer):
191 consumer.start_record() 192 193 self._scan_accession(uhandle, consumer) 194 self._scan_prosite_refs(uhandle, consumer) 195 read_and_call(uhandle, consumer.noevent, start='{BEGIN}') 196 self._scan_text(uhandle, consumer) 197 self._scan_refs(uhandle, consumer) 198 self._scan_copyright(uhandle, consumer) 199 read_and_call(uhandle, consumer.noevent, start='{END}') 200 201 consumer.end_record()
202
203 - def _scan_accession(self, uhandle, consumer):
204 read_and_call(uhandle, consumer.accession, start='{PDOC')
205
206 - def _scan_prosite_refs(self, uhandle, consumer):
207 while attempt_read_and_call(uhandle, consumer.prosite_reference, 208 start='{PS'): 209 pass
210
211 - def _scan_text(self, uhandle, consumer):
212 while 1: 213 line = safe_readline(uhandle) 214 if (line[0] == '[' and line[3] == ']' and line[4] == ' ') or \ 215 line[:5] == '{END}': 216 uhandle.saveline(line) 217 break 218 consumer.text(line)
219
220 - def _scan_refs(self, uhandle, consumer):
221 while 1: 222 line = safe_readline(uhandle) 223 if line[:5] == '{END}' or is_blank_line(line): 224 uhandle.saveline(line) 225 break 226 consumer.reference(line)
227
236
237 -class _RecordConsumer(AbstractConsumer):
238 """Consumer that converts a Prodoc record to a Record object. 239 240 Members: 241 data Record with Prodoc data. 242 243 """
244 - def __init__(self):
245 self.data = None
246
247 - def start_record(self):
248 self.data = Record()
249
250 - def end_record(self):
251 self._clean_data()
252
253 - def accession(self, line):
254 line = line.rstrip() 255 if line[0] != '{' or line[-1] != '}': 256 raise ValueError("I don't understand accession line\n%s" % line) 257 acc = line[1:-1] 258 if acc[:4] != 'PDOC': 259 raise ValueError("Invalid accession in line\n%s" % line) 260 self.data.accession = acc
261
262 - def prosite_reference(self, line):
263 line = line.rstrip() 264 if line[0] != '{' or line[-1] != '}': 265 raise ValueError("I don't understand accession line\n%s" % line) 266 acc, name = line[1:-1].split('; ') 267 self.data.prosite_refs.append((acc, name))
268
269 - def text(self, line):
270 self.data.text = self.data.text + line
271
272 - def reference(self, line):
273 if line[0] == '[' and line[3] == ']': # new reference 274 self._ref = Reference() 275 self._ref.number = line[1:3].strip() 276 if line[1] == 'E': 277 # If it's an electronic reference, then the URL is on the 278 # line, instead of the author. 279 self._ref.citation = line[4:].strip() 280 else: 281 self._ref.authors = line[4:].strip() 282 self.data.references.append(self._ref) 283 elif line[:4] == ' ': 284 if not self._ref: 285 raise ValueError("Unnumbered reference lines\n%s" % line) 286 self._ref.citation = self._ref.citation + line[5:] 287 else: 288 raise Exception("I don't understand the reference line\n%s" % line)
289
290 - def _clean_data(self):
291 # get rid of trailing newlines 292 for ref in self.data.references: 293 ref.citation = ref.citation.rstrip() 294 ref.authors = ref.authors.rstrip()
295
296 -def index_file(filename, indexname, rec2key=None):
297 """index_file(filename, indexname, rec2key=None) 298 299 Index a Prodoc file. filename is the name of the file. 300 indexname is the name of the dictionary. rec2key is an 301 optional callback that takes a Record and generates a unique key 302 (e.g. the accession number) for the record. If not specified, 303 the id name will be used. 304 305 """ 306 import os 307 if not os.path.exists(filename): 308 raise ValueError("%s does not exist" % filename) 309 310 index = Index.Index(indexname, truncate=1) 311 index[Dictionary._Dictionary__filename_key] = filename 312 313 handle = open(filename) 314 records = parse(handle) 315 end = 0L 316 for record in records: 317 start = end 318 end = long(handle.tell()) 319 length = end - start 320 321 if rec2key is not None: 322 key = rec2key(record) 323 else: 324 key = record.accession 325 326 if not key: 327 raise KeyError("empty key was produced") 328 elif key in index: 329 raise KeyError("duplicate key %s found" % key) 330 331 index[key] = start, length
332