Package Bio :: Package ExPASy :: Module Prodoc
[hide private]
[frames] | no frames]

Source Code for Module Bio.ExPASy.Prodoc

  1  # Copyright 2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with the prosite.doc file from 
  8  Prosite. 
  9  http://www.expasy.ch/prosite/ 
 10   
 11  Tested with: 
 12  Release 15.0, July 1998 
 13  Release 16.0, July 1999 
 14  Release 20.22, 13 November 2007 
 15  Release 20.43, 10 February 2009 
 16   
 17   
 18  Functions: 
 19  read               Read a Prodoc file containing exactly one Prodoc entry. 
 20  parse              Iterates over entries in a Prodoc file. 
 21   
 22  Classes: 
 23  Record             Holds Prodoc data. 
 24  Reference          Holds data from a Prodoc reference. 
 25   
 26   
 27  DEPRECATED functions: 
 28  index_file         Index a Prodoc file for a Dictionary. 
 29  _extract_record    Extract Prodoc data from a web page. 
 30   
 31  DEPRECATED classes: 
 32  Dictionary         Accesses a Prodoc file using a dictionary interface. 
 33  RecordParser       Parses a Prodoc record into a Record object. 
 34  _Scanner           Scans Prodoc-formatted data. 
 35  _RecordConsumer    Consumes Prodoc data to a Record object. 
 36  Iterator           Iterates over entries in a Prodoc file. 
 37  """ 
 38   
 39   
40 -def read(handle):
41 record = __read(handle) 42 # We should have reached the end of the record by now 43 line = handle.readline() 44 if line: 45 raise ValueError("More than one Prodoc record found") 46 return record
47
48 -def parse(handle):
49 while True: 50 record = __read(handle) 51 if not record: 52 return 53 yield record
54
55 -class Record:
56 """Holds information from a Prodoc record. 57 58 Members: 59 accession Accession number of the record. 60 prosite_refs List of tuples (prosite accession, prosite name). 61 text Free format text. 62 references List of reference objects. 63 64 """
65 - def __init__(self):
66 self.accession = '' 67 self.prosite_refs = [] 68 self.text = '' 69 self.references = []
70 71
72 -class Reference:
73 """Holds information from a Prodoc citation. 74 75 Members: 76 number Number of the reference. (string) 77 authors Names of the authors. 78 citation Describes the citation. 79 80 """
81 - def __init__(self):
82 self.number = '' 83 self.authors = '' 84 self.citation = ''
85 86 # Below are private functions 87
88 -def __read_prosite_reference_line(record, line):
89 line = line.rstrip() 90 if line[-1] != '}': 91 raise ValueError("I don't understand the Prosite reference on line\n%s" % line) 92 acc, name = line[1:-1].split('; ') 93 record.prosite_refs.append((acc, name))
94
95 -def __read_text_line(record, line):
96 record.text += line 97 return True
98
99 -def __read_reference_start(record, line):
100 # Read the references 101 reference = Reference() 102 reference.number = line[1:3].strip() 103 if line[1] == 'E': 104 # If it's an electronic reference, then the URL is on the 105 # line, instead of the author. 106 reference.citation = line[4:].strip() 107 else: 108 reference.authors = line[4:].strip() 109 record.references.append(reference)
110
111 -def __read_reference_line(record, line):
112 if not line.strip(): 113 return False 114 reference = record.references[-1] 115 if line.startswith(' '): 116 if reference.authors[-1]==',': 117 reference.authors += line[4:].rstrip() 118 else: 119 reference.citation += line[5:] 120 return True 121 raise Exception("I don't understand the reference line\n%s" % line)
122 128
129 -def __read(handle):
130 # Skip blank lines between records 131 for line in handle: 132 line = line.rstrip() 133 if line and not line.startswith("//"): 134 break 135 else: 136 return None 137 record = Record() 138 # Read the accession number 139 if not line.startswith("{PDOC"): 140 raise ValueError("Line does not start with '{PDOC':\n%s" % line) 141 if line[-1] != '}': 142 raise ValueError("I don't understand accession line\n%s" % line) 143 record.accession = line[1:-1] 144 # Read the Prosite references 145 for line in handle: 146 if line.startswith('{PS'): 147 __read_prosite_reference_line(record, line) 148 else: 149 break 150 else: 151 raise ValueError("Unexpected end of stream.") 152 # Read the actual text 153 if not line.startswith('{BEGIN'): 154 raise ValueError("Line does not start with '{BEGIN':\n%s" % line) 155 read_line = __read_text_line 156 for line in handle: 157 if line.startswith('{END}'): 158 # Clean up the record and return 159 for reference in record.references: 160 reference.citation = reference.citation.rstrip() 161 reference.authors = reference.authors.rstrip() 162 return record 163 elif line[0] == '[' and line[3] == ']' and line[4] == ' ': 164 __read_reference_start(record, line) 165 read_line = __read_reference_line 166 elif line.startswith('+----'): 167 read_line = __read_copyright_line 168 elif read_line: 169 if not read_line(record, line): 170 read_line = None 171 raise ValueError("Unexpected end of stream.")
172