Trees | Indices | Help |
---|
|
1 # Copyright 2001 by Katharine Lindner. All rights reserved. 2 # This code is part of the Biopython distribution and governed by its 3 # license. Please see the LICENSE file that should have been included 4 # as part of this package. 5 6 """ 7 This module provides code to work with html files from InterPro, 8 and code to access resources at InterPro over the WWW. 9 http://www.ebi.ac.uk/interpro/ 10 11 12 Classes: 13 Record Holds interpro sequence data. 14 InterProParser Parses interpro sequence data into a Record object. 15 16 Functions: 17 get_interpro_entry 18 19 """ 20 21 from Bio import File 22 import sgmllib 23 from Bio.SeqFeature import Reference 24265328 keys = self.keys() 29 keys.sort() 30 out = '' 31 for key in keys: 32 val = self[ key ] 33 if key == 'References': 34 out = out + '\n%s\n' % key 35 for reference in val: 36 out = out + '%s\n' % str( reference ) 37 out = out + '\n' 38 elif key == 'Examples': 39 out = out + '\n%s\n' % key 40 for example in val: 41 out = out + '%s\n' % example 42 elif key == 'Abstract': 43 out = out + '\n%s\n' % key 44 out = out + '%s...\n' % val[ : 80 ] 45 elif type( self[ key ] ) == list: 46 out = out + '\n%s\n' % key 47 for item in val: 48 out = out + '%s\n' % item 49 50 else: 51 out = out + '%s: %s\n' % ( key, self[ key ] ) 52 return out55 """Parses InterPro sequence data into a Record object. 56 57 """249 254 262 263 if __name__ == '__main__': 264 import Bio.File 265 handle = open('IPR001064.htm') 266 undo_handle = Bio.File.UndoHandle( handle ) 267 interpro_parser = InterProParser() 268 record = interpro_parser.parse( handle ) 269 print str( record ) 27059 sgmllib.SGMLParser.reset( self ) 60 self.text = '' 61 self.inter_pro_dict = Record() 62 self.inter_pro_dict['Database'] = '' 63 self.inter_pro_dict['Accession'] = '' 64 self.inter_pro_dict['Name'] = '' 65 self.inter_pro_dict['Dates'] = '' 66 self.inter_pro_dict['Type'] = '' 67 self.inter_pro_dict['Parent'] = '' 68 self.inter_pro_dict['Process'] = '' 69 self.inter_pro_dict['Function'] = '' 70 self.inter_pro_dict['Component'] = '' 71 self.inter_pro_dict['Signatures'] = [] 72 self.inter_pro_dict['Abstract'] = '' 73 self.inter_pro_dict['Examples'] = [] 74 self.inter_pro_dict['References'] = [] 75 self.inter_pro_dict['Database links'] = [] 76 self._state = 'title' 77 self._reference_state = '' 78 self._key_waiting = '' 79 self._current_reference = ''80 8587 """feed(self, handle ) 88 89 Feed in interpro data for scanning. handle is a file-like object 90 containing interpro data. consumer is a Consumer object that will 91 receive events as the ndb data is scanned. 92 93 """ 94 if isinstance(handle, File.UndoHandle): 95 uhandle = handle 96 else: 97 uhandle = File.UndoHandle(handle) 98 text = '' 99 while 1: 100 line = uhandle.readline() 101 if not line: 102 break 103 line = line.strip() 104 if line[ -7: ] == '</HTML>': 105 break 106 text = text + ' ' + line 107 108 sgmllib.SGMLParser.feed( self, text )109 110 114 119 122 125127 dictionary = dict( attrs ) 128 if self._state == 'chugging_along': 129 if 'class' in dictionary: 130 if dictionary['class'] == 'tag': 131 self._state = 'waiting_tag' 132 self._flush_text() 133 elif dictionary['class'] == 'inf': 134 self._state = 'waiting_inf' 135 self._flush_text()136138 if self._state == 'waiting_tag': 139 self._key_waiting = self._flush_text() 140 self._state = 'chugging_along' 141 elif self._state == 'waiting_inf': 142 key = self._key_waiting 143 if key in self.inter_pro_dict: 144 val = self._flush_text() 145 if key == 'Signatures': 146 pass 147 elif key == 'Database links': 148 pass 149 else: 150 self.inter_pro_dict[ key ] = val 151 self._key_waiting = '' 152 self._state = 'chugging_along'153 154 159 163165 if self._key_waiting == 'References': 166 self._state = 'references' 167 self._reference_state = 'pubmed_id' 168 self._flush_text() 169 self._references = []170172 if self._state == 'references': 173 self._references.append( self._current_reference ) 174 self.inter_pro_dict['References'] = self._references 175 self._state = 'chugging_along'176178 if self._state == 'references': 179 self._reference_state = 'pubmed_id' 180 self._flush_text() 181 if( self._current_reference != '' ): 182 self._references.append( self._current_reference ) 183 self._current_reference = Reference()184186 if self._state == 'examples': 187 text = self._flush_text() 188 self.inter_pro_dict['Examples'].append( text )189191 dictionary = dict( attrs ) 192 if self._state == 'references': 193 if self._reference_state == 'pubmed_id': 194 if 'name' in dictionary: 195 self._current_reference.pubmed_id = dictionary['name'] 196 self._reference_state = 'authors' 197 elif self._reference_state == 'journal': 198 self._current_reference.journal = self._flush_text() 199 self._reference_state = 'medline_id'200202 if self._state == 'references': 203 if self._reference_state == 'medline_id': 204 text = self._flush_text() 205 cols = text.split( ':' ) 206 try: 207 medline_id = cols[ 1 ] 208 except IndexError: 209 medline_id = None 210 else: 211 medline_id = medline_id[ : -1 ] 212 self._current_reference.medline_id = medline_id213215 if self._state == 'references': 216 if self._reference_state == 'authors': 217 self._current_reference.authors = self._flush_text() 218 self._reference_state = 'title' 219 elif self._key_waiting == 'Signatures': 220 self.inter_pro_dict['Signatures'].append( self._flush_text() ) 221 elif self._key_waiting == 'Database links': 222 self.inter_pro_dict['Database links'].append( self._flush_text() )223 226228 if self._state == 'references': 229 if self._reference_state == 'title': 230 text = self._flush_text() 231 self._current_reference.title = text 232 self._reference_state = 'journal'233 234236 if self._state == 'references': 237 if tag == 'li': 238 self.stack.pop() 239 elif tag == 'a': 240 if self._reference_state == 'pubmed_id': 241 self.stack.pop() 242 method(attrs)243 244
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Sun May 3 15:51:46 2009 | http://epydoc.sourceforge.net |