Package Martel :: Module Parser
[hide private]
[frames] | no frames]

Source Code for Module Martel.Parser

  1  # Copyright 2000-2001, Dalke Scientific Software, LLC 
  2  # Distributed under the Biopython License Agreement (see the LICENSE file). 
  3   
  4  """Implement Martel parsers. 
  5   
  6  The classes in this module are used by other Martel modules and not 
  7  typically by external users. 
  8   
  9  There are two major parsers, 'Parser' and 'RecordParser.'  The first 
 10  is the standard one, which parses the file as one string in memory 
 11  then generates the SAX events.  The other reads a record at a time 
 12  using a RecordReader and generates events after each read.  The 
 13  generated event callbacks are identical. 
 14   
 15  At some level, both parsers use "_do_callback" to convert mxTextTools 
 16  tags into SAX events. 
 17   
 18  XXX finish this documentation 
 19   
 20  XXX need a better way to get closer to the likely error position when 
 21  parsing. 
 22   
 23  XXX need to implement Locator 
 24   
 25  """ 
 26   
 27  import urllib, pprint, traceback, sys, string 
 28  from xml.sax import xmlreader, _exceptions, handler, saxutils 
 29   
 30  try: 
 31      from mx import TextTools 
 32  except ImportError: 
 33      import TextTools 
 34   
 35  try: 
 36      from cStringIO import StringIO 
 37  except ImportError: 
 38      from StringIO import StringIO 
 39   
 40  import Dispatch 
 41   
 42  # These exceptions are liable to change in the future 
43 -class ParserException(_exceptions.SAXException):
44 """used when a parse cannot be done"""
45 - def setLocation(self, text):
46 self._msg += "; in %s" % repr(text)
47
48 -class ParserPositionException(ParserException):
49 - def __init__(self, pos):
50 ParserException.__init__(self, 51 "error parsing at or beyond character %d" % pos, 52 None) 53 self.pos = pos
54 - def __iadd__(self, offset):
55 self.pos += offset 56 self._msg = "error parsing at or beyond character %d" % self.pos 57 return self
58
59 -class ParserIncompleteException(ParserPositionException):
60 - def __init__(self, pos):
61 ParserPositionException.__init__(self, pos) 62 self._msg += " (unparsed text remains)"
63 - def __iadd__(self, offset):
64 ParserPositionException.__iadd__(self, offset) 65 self._msg += " (unparsed text remains)"
66
67 -class ParserRecordException(ParserException):
68 """used by the RecordParser when it can't read a record""" 69 pass
70 71 72 # Uses a hack to support back references in mxTextTools! 73 74 # THIS MEANS SINGLE THREADED SUPPORT for anything using 75 # backreferences! There is a much more complicated solution where the 76 # first element of any taglist is defined to contain the _match_group 77 # for that parse session. I don't want to do that, since another 78 # approach is to modify mxTextTools to pass around an extra state 79 # object, or to write my own code. (Did someone say NIH syndrome? :) 80 _match_group = {} 81 82 83 84 # The SAX startElements take an AttributeList as the second argument. 85 # Martel's attributes can be empty, so make a simple class which 86 # doesn't do anything and which I can guarantee won't be modified.
87 -class MartelAttributeList(xmlreader.AttributesImpl):
88 - def getLength(self):
89 return 0
90 - def getName(self, i):
91 raise IndexError, i
92 - def getType(self, i):
93 raise IndexError, i
94 - def getValue(self, i):
95 raise IndexError, i
96 - def __len__(self):
97 return 0
98 - def __getitem__(self, key):
99 if type(key) == type(0): 100 raise IndexError, key 101 else: 102 raise KeyError, key
103 - def keys(self):
104 return []
105 - def values(self):
106 return []
107 - def items(self):
108 return []
109 - def has_key(self, key):
110 return 0
111 - def get(self, key, alternative):
112 return alternative
113 - def __repr__(self):
114 return "{}"
115 - def __str__(self):
116 return "{}"
117 118 # singleton object shared amoung all startElement calls 119 _attribute_list = MartelAttributeList([]) 120 121
122 -def _do_callback(s, begin, end, taglist, cont_handler, attrlookup):
123 """internal function to convert the tagtable into ContentHandler events 124 125 's' is the input text 126 'begin' is the current position in the text 127 'end' is 1 past the last position of the text allowed to be parsed 128 'taglist' is the tag list from mxTextTools.parse 129 'cont_handler' is the SAX ContentHandler 130 'attrlookup' is a dict mapping the encoded tag name to the element info 131 """ 132 # bind functions to local names for a slight speedup 133 characters = cont_handler.characters 134 startElement = cont_handler.startElement 135 endElement = cont_handler.endElement 136 137 for tag, l, r, subtags in taglist: 138 # If the tag's beginning is after the current position, then 139 # the text from here to the tag's beginning are characters() 140 assert begin <= l, "begin = %d and l = %d" % (begin, l) 141 if begin < l: 142 characters(s[begin:l]) 143 144 if tag.startswith(">"): 145 # Named groups doesn't create ">ignore" tags, so pass them on 146 # to the ContentHandler. Unnamed groups still need a name so 147 # mxTextTools can create subtags for them. I named them 148 # ">ignore" - don't create events for them. 149 if not tag == ">ignore": 150 assert tag.startswith(">G"),"Unknown special tag %s" % repr(tag) 151 # This needs a lookup to get the full attrs 152 realtag, attrs = attrlookup[tag] 153 startElement(realtag, attrs) 154 155 else: 156 # Normal tags 157 startElement(tag, _attribute_list) 158 159 # Recurse if it has any children 160 if subtags: 161 _do_callback(s, l, r, subtags, cont_handler, attrlookup) 162 else: 163 characters(s[l:r]) 164 begin = r 165 166 if tag.startswith(">"): 167 if tag.startswith(">G"): 168 realtag, attrs = attrlookup[tag] 169 endElement(realtag) 170 else: 171 endElement(tag) 172 173 # anything after the last tag and before the end of the current 174 # range are characters 175 if begin < end: 176 characters(s[begin:end])
177
178 -def _do_dispatch_callback(s, begin, end, taglist, 179 start_table_get, cont_handler, save_stack, 180 end_table_get, 181 attrlookup):
182 """internal function to convert the tagtable into ContentHandler events 183 184 THIS IS A SPECIAL CASE FOR Dispatch.Dispatcher objects 185 186 's' is the input text 187 'begin' is the current position in the text 188 'end' is 1 past the last position of the text allowed to be parsed 189 'taglist' is the tag list from mxTextTools.parse 190 'start_table_get' is the Dispatcher._start_table 191 'cont_handler' is the Dispatcher 192 'end_table_get' is the Dispatcher._end_table 193 'cont_handler' is the SAX ContentHandler 194 'attrlookup' is a dict mapping the encoded tag name to the element info 195 """ 196 for tag, l, r, subtags in taglist: 197 # If the tag's beginning is after the current position, then 198 # the text from here to the tag's beginning are characters() 199 assert begin <= l, "begin = %d and l = %d" % (begin, l) 200 if begin < l and save_stack: 201 cont_handler._save_text += s[begin:l] 202 203 # Normal tags, see if the start function exists and call it 204 # ** This is a bit of a hack, in that this check also occurs 205 # with special tags. But those begin with a '>' so will 206 # always fail. This makes the logic a bit simpler and 207 # faster than checking the '>G' and '>ignore' terms. 208 # However, it is possible that specially constructed 209 # handlers could mess things up. That cannot happen by 210 # accident, so I won't worry about it. 211 # Yes, this reaches into the implementation of the Dispatcher. 212 f = start_table_get(tag) 213 if f is not None: 214 f(tag, _attribute_list) 215 else: 216 # Tags with attributes 217 x = attrlookup.get(tag) 218 if x is not None: 219 realtag, attrs = x 220 # Does this function exist? 221 f = start_table_get(realtag) 222 if f is not None: 223 f(realtag, attrs) 224 225 # Recurse if it has any children 226 if subtags: 227 _do_dispatch_callback(s, l, r, subtags, 228 start_table_get, 229 cont_handler, save_stack, 230 end_table_get, 231 attrlookup) 232 elif save_stack: 233 # Yes, this reaches into the implementation of the Dispatcher. 234 cont_handler._save_text += s[l:r] 235 begin = r 236 237 # See if theres' a function for the normal tag 238 f = end_table_get(tag) 239 if f is not None: 240 f(tag) 241 else: 242 # See if the special attribute tag exists 243 x = attrlookup.get(tag) 244 if x is not None: 245 realtag, attrs = x 246 # Yes, this reaches into the implementation of the Dispatcher. 247 f = end_table_get(realtag) 248 if f is not None: 249 f(realtag) 250 251 # anything after the last tag and before the end of the current 252 # range are characters 253 if begin < end and save_stack: 254 cont_handler._save_text += s[begin:end]
255
256 -def _parse_elements(s, tagtable, cont_handler, debug_level, attrlookup):
257 """parse the string with the tagtable and send the ContentHandler events 258 259 Specifically, it sends the startElement, endElement and characters 260 events but not startDocument and endDocument. 261 """ 262 if debug_level: 263 import Generate 264 Generate._position = 0 265 266 result, taglist, pos = TextTools.tag(s, tagtable, 0, len(s)) 267 268 # Special case test for the base ContentHandler since I know that 269 # object does nothing and I want to test the method call overhead. 270 if isinstance(cont_handler, Dispatch.Dispatcher): 271 _do_dispatch_callback(s, 0, pos, taglist, 272 cont_handler._start_table.get, 273 cont_handler, cont_handler._save_stack, 274 cont_handler._end_table.get, 275 attrlookup) 276 elif cont_handler.__class__ != handler.ContentHandler: 277 # Send any tags to the client (there can be some even if there 278 _do_callback(s, 0, pos, taglist, cont_handler, attrlookup) 279 280 if not result: 281 if debug_level: 282 return ParserPositionException(Generate._position) 283 else: 284 return ParserPositionException(pos) 285 elif pos != len(s): 286 return pos 287 else: 288 return None
289 290 # This needs an interface like the standard XML parser
291 -class Parser(xmlreader.XMLReader):
292 """Parse the input data all in memory""" 293
294 - def __init__(self, tagtable, (want_groupref_names, debug_level, attrlookup) = (0, 1, {})):
295 xmlreader.XMLReader.__init__(self) 296 297 assert type(tagtable) == type( () ), "mxTextTools only allows a tuple tagtable" 298 self.tagtable = tagtable 299 300 # WARNING: This attribute is set directly by Generate - it bypasses 301 # the value used in __init__. 302 # Used to tell if the global "match_group" dict needs to be cleared. 303 self.want_groupref_names = want_groupref_names 304 305 self.debug_level = debug_level 306 self.attrlookup = attrlookup
307
308 - def copy(self):
309 parser = Parser(self.tagtable, (self.want_groupref_names, 310 self.debug_level, self.attrlookup)) 311 parser.setContentHandler(self.getContentHandler()) 312 parser.setErrorHandler(self.getErrorHandler()) 313 parser.setDTDHandler(self.getDTDHandler()) 314 return parser
315
316 - def __str__(self):
317 x = StringIO() 318 pprint.pprint(self.tagtable, x) 319 return x.getvalue()
320
321 - def parseFile(self, fileobj):
322 """parse using the input file object 323 324 XXX will be removed with the switch to Python 2.0, where parse() 325 takes an 'InputSource' 326 """ 327 # Just parse as a string 328 self.parseString(fileobj.read())
329
330 - def parse(self, source):
331 """parse using the URL or file handle""" 332 source = saxutils.prepare_input_source(source) 333 self.parseFile(source.getCharacterStream() or source.getByteStream())
334
335 - def parseString(self, s):
336 """parse using the given string 337 338 XXX will be removed with the switch to Python 2.0, where parse() 339 takes an 'InputSource' 340 """ 341 self._cont_handler.startDocument() 342 343 if self.want_groupref_names: 344 _match_group.clear() 345 346 # parse the text and send the SAX events 347 result = _parse_elements(s, self.tagtable, self._cont_handler, 348 self.debug_level, self.attrlookup) 349 350 if result is None: 351 # Successful parse 352 pass 353 354 elif isinstance(result, _exceptions.SAXException): 355 # could not parse record, and wasn't EOF 356 self._err_handler.fatalError(result) 357 358 else: 359 # Parsed a record, but extra text remains 360 pos = result 361 self._err_handler.fatalError(ParserIncompleteException(pos)) 362 363 # Send an endDocument event even after errors 364 self._cont_handler.endDocument()
365
366 - def close(self):
367 pass
368
369 -class RecordParser(xmlreader.XMLReader):
370 """Parse the input data a record at a time"""
371 - def __init__(self, format_name, attrs, record_tagtable, 372 (want_groupref_names, debug_level, attrlookup), 373 make_reader, reader_args = ()):
374 """parse the input data a record at a time 375 376 format_name - XML tag name for the whole data file 377 record_tagtable - mxTexTools tag table for each record 378 want_groupref_names - flag to say if the match_group table needs to 379 be reset (will disappear with better support from mxTextTools) 380 381 make_reader - callable object which creates a RecordReader; first 382 parameter will be an input file object 383 reader_args - optional arguments to pass to make_reader after the 384 input file object 385 """ 386 xmlreader.XMLReader.__init__(self) 387 388 self.format_name = format_name 389 self.attrs = attrs 390 assert type(record_tagtable) == type( () ), \ 391 "mxTextTools only allows a tuple tagtable" 392 self.tagtable = record_tagtable 393 self.want_groupref_names = want_groupref_names 394 self.debug_level = debug_level 395 self.attrlookup = attrlookup 396 self.make_reader = make_reader 397 self.reader_args = reader_args
398
399 - def copy(self):
400 parser = RecordParser(self.format_name, self.attrs, self.tagtable, 401 (self.want_groupref_names, self.debug_level, 402 self.attrlookup), 403 self.make_reader, self.reader_args) 404 parser.setContentHandler(self.getContentHandler()) 405 parser.setErrorHandler(self.getErrorHandler()) 406 parser.setDTDHandler(self.getDTDHandler()) 407 return parser
408 409
410 - def __str__(self):
411 x = StringIO() 412 pprint.pprint(self.tagtable, x) 413 return "parse records: " + x.getvalue()
414
415 - def parseFile(self, fileobj):
416 """parse using the input file object 417 418 XXX will be removed with the switch to Python 2.0, where parse() 419 takes an 'InputSource' 420 """ 421 self._cont_handler.startDocument() 422 423 try: 424 reader = self.make_reader( *(fileobj,) + self.reader_args) 425 except (KeyboardInterrupt, SystemExit): 426 raise 427 except: 428 # something unexpected happened 429 # so call it a fatal error and stop 430 outfile = StringIO() 431 traceback.print_exc(file=outfile) 432 self._err_handler.fatalError(ParserRecordException( 433 outfile.getvalue(), sys.exc_info()[1])) 434 self._cont_handler.endDocument() 435 return 436 437 if self.want_groupref_names: 438 _match_group.clear() 439 440 self._cont_handler.startElement(self.format_name, self.attrs) 441 filepos = 0 # can get mixed up with DOS style "\r\n" 442 while 1: 443 try: 444 record = reader.next() 445 except (KeyboardInterrupt, SystemExit): 446 raise 447 except: 448 # something unexpected happened (couldn't find a record?) 449 # so call it a fatal error and stop 450 outfile = StringIO() 451 traceback.print_exc(file=outfile) 452 self._err_handler.fatalError(ParserRecordException( 453 outfile.getvalue(), sys.exc_info()[1])) 454 self._cont_handler.endDocument() 455 return 456 457 if record is None: 458 break 459 result = _parse_elements(record, self.tagtable, self._cont_handler, 460 self.debug_level, self.attrlookup) 461 462 if result is None: 463 # Successfully read the record 464 pass 465 elif isinstance(result, _exceptions.SAXException): 466 # Wrong format or a SAX problem, but this is recoverable 467 result += filepos 468 self._err_handler.error(result) 469 else: 470 # Did not reach end of string, but this is recoverable 471 pos = filepos + result 472 self._err_handler.error(ParserPositionException(pos)) 473 474 filepos = filepos + len(record) 475 476 self._cont_handler.endElement(self.format_name) 477 self._cont_handler.endDocument()
478
479 - def parse(self, source):
480 """parse using the URL or file handle""" 481 source = saxutils.prepare_input_source(source) 482 self.parseFile(source.getCharacterStream() or source.getByteStream())
483
484 - def parseString(self, s):
485 """parse using the given string 486 487 XXX will be removed with the switch to Python 2.0, where parse() 488 takes an 'InputSource' 489 """ 490 # Just parse it as a file 491 strfile = StringIO(s) 492 self.parseFile(strfile)
493
494 - def close(self):
495 pass
496
497 -class HeaderFooterParser(xmlreader.XMLReader):
498 """Header followed by 0 or more records followed by a footer"""
499 - def __init__(self, format_name, attrs, 500 make_header_reader, header_reader_args, header_tagtable, 501 make_reader, reader_args, record_tagtable, 502 make_footer_reader, footer_reader_args, footer_tagtable, 503 (want_groupref_names, debug_level, attrlookup)):
504 xmlreader.XMLReader.__init__(self) 505 506 self.format_name = format_name 507 self.attrs = attrs 508 509 self.make_header_reader = make_header_reader 510 self.header_reader_args = header_reader_args 511 self.header_tagtable = header_tagtable 512 513 self.make_reader = make_reader 514 self.reader_args = reader_args 515 self.record_tagtable = record_tagtable 516 517 self.make_footer_reader = make_footer_reader 518 self.footer_reader_args = footer_reader_args 519 self.footer_tagtable = footer_tagtable 520 521 self.want_groupref_names = want_groupref_names 522 self.debug_level = debug_level 523 self.attrlookup = attrlookup
524
525 - def __str__(self):
526 x = StringIO() 527 pprint.pprint( (self.header_tagtable, self.record_tagtable, 528 self.footer_tagtable), x) 529 return "header footer records: " + x.getvalue()
530
531 - def copy(self):
532 parser = HeaderFooterParser(self.format_name, self.attrs, 533 self.make_header_reader, self.header_reader_args, self.header_tagtable, 534 self.make_reader, self.reader_args, self.record_tagtable, 535 self.make_footer_reader, self.footer_reader_args, self.footer_tagtable, 536 (self.want_groupref_names, self.debug_level, self.attrlookup)) 537 538 parser.setContentHandler(self.getContentHandler()) 539 parser.setErrorHandler(self.getErrorHandler()) 540 parser.setDTDHandler(self.getDTDHandler()) 541 return parser
542 543
544 - def parseString(self, s):
545 strfile = StringIO(s) 546 self.parseFile(strfile)
547
548 - def parse(self, source):
549 """parse using the URL or file handle""" 550 source = saxutils.prepare_input_source(source) 551 self.parseFile(source.getCharacterStream() or source.getByteStream())
552
553 - def parseFile(self, fileobj):
554 self._cont_handler.startDocument() 555 self._cont_handler.startElement(self.format_name, self.attrs) 556 557 if self.want_groupref_names: 558 _match_group.clear() 559 560 # Read the header 561 filepos = 0 562 lookahead = "" 563 if self.make_header_reader is not None: 564 try: 565 header_reader = self.make_header_reader( 566 *(fileobj,) + self.header_reader_args) 567 header = header_reader.next() 568 except (KeyboardInterrupt, SystemExit): 569 raise 570 except: 571 # Something unexpected happend so call it a fatal error 572 outfile = StringIO() 573 traceback.print_exc(file=outfile) 574 exc = ParserRecordException(outfile.getvalue(), 575 sys.exc_info()[1]) 576 self._err_handler.fatalError(exc) 577 self._cont_handler.endDocument() 578 return 579 580 # Parse the text (if any) and send the SAX events 581 if header is None: 582 header = "" 583 filepos += len(header) 584 585 result = _parse_elements(header, self.header_tagtable, 586 self._cont_handler, self.debug_level, 587 self.attrlookup) 588 if result is None: 589 # Successful parse 590 pass 591 elif isinstance(result, _exceptions.SAXException): 592 # Could not parse header and wasn't EOF 593 self._err_handler.fatalError(result) 594 self._cont_handler.endDocument() 595 return 596 else: 597 # Reached EOF 598 pos = result 599 self._err_handler.fatalError(ParserPositionException(pos)) 600 self._cont_handler.endDocument() 601 return 602 603 # We've successfully parsed the header, now parse the records 604 605 # Get any forward data from the header reader 606 if self.make_header_reader is None: 607 x, lookahead = fileobj, "" 608 else: 609 x, lookahead = header_reader.remainder() 610 611 if self.make_footer_reader is None: 612 # Only records - no footer 613 try: 614 reader = self.make_reader( *(fileobj,) + self.reader_args, 615 **{"lookahead": lookahead}) 616 except (KeyboardInterrupt, SystemExit): 617 raise 618 except: 619 # Something unexpected happened so call it a fatal 620 # error and stop 621 outfile = StringIO() 622 traceback.print_exc(file=outfile) 623 exc = ParserRecordException(outfile.getvalue(), 624 sys.exc_info()[1]) 625 self._err_handler.fatalError(exc) 626 self._cont_handler.endDocument() 627 return 628 629 while 1: 630 try: 631 record = reader.next() 632 except (KeyboardInterrupt, SystemExit): 633 raise 634 except: 635 # Something unexpected happened and I cannot recover 636 outfile = StringIO() 637 traceback.print_exc(file=outfile) 638 exc = ParserRecordException(outfile.getvalue(), 639 sys.exc_info()[1]) 640 self._err_handler.fatalError(exc) 641 self._cont_handler.endDocument() 642 return 643 644 if record is None: 645 # Reached EOF, so that's it (since there's no footer) 646 self._cont_handler.endElement(self.format_name) 647 self._cont_handler.endDocument() 648 return 649 650 result = _parse_elements(record, self.record_tagtable, 651 self._cont_handler, self.debug_level, 652 self.attrlookup) 653 if result is None: 654 # Successfully parsed the record 655 pass 656 else: 657 # Failed to parse the record, but can recover 658 if isinstance(result, _exceptions.SAXException): 659 result += filepos 660 else: 661 result = ParserPositionException(filepos + result) 662 self._err_handler.error(result) 663 664 filepos += len(record) 665 666 assert self.make_footer_reader is not None, "internal error" 667 668 # This gets to be quite complicated :( 669 670 # If the record fails, try the footer. If that fails, 671 # skip the record and try again 672 record_exc = None 673 try: 674 reader = self.make_reader( *(fileobj,) + self.reader_args, 675 **{"lookahead": lookahead}) 676 except (KeyboardInterrupt, SystemExit): 677 raise 678 except: 679 # Something unexpected happened - could be that there was 680 # no record and only a footer? Save the current exception. 681 outfile = StringIO() 682 traceback.print_exc(file=outfile) 683 record_exc = ParserRecordException(outfile.getvalue(), 684 sys.exc_info()[1]) 685 686 while record_exc is None: 687 try: 688 record = reader.next() 689 except (KeyboardInterrupt, SystemExit): 690 raise 691 except: 692 # Something unexpected happened. Could be the footer, 693 # but save the current exception in case it isn't 694 outfile = StringIO() 695 traceback.print_exc(file=outfile) 696 record_exc = ParserRecordException(outfile.getvalue(), 697 sys.exc_info()[1]) 698 break 699 700 if record is None: 701 # Reached EOF, but there should have been a footer 702 record_exc = ParserPositionException(filepos) 703 break 704 705 result = _parse_elements(record, self.record_tagtable, 706 self._cont_handler, self.debug_level, 707 self.attrlookup) 708 if result is None: 709 # Successfully parsed the record 710 pass 711 else: 712 # Failed to parse the record, but may recover of it 713 # isn't the footer 714 if isinstance(result, _exceptions.SAXException): 715 result += filepos 716 else: 717 result = ParserPositionException(filepos + result) 718 record_exc = result 719 720 # Is there a valid footer? 721 try: 722 footer = "" 723 x, lookahead = reader.remainder() 724 footer_reader = self.make_footer_reader( 725 *(fileobj,) + self.footer_reader_args, 726 **{"lookahead": record + lookahead}) 727 footer = footer_reader.next() 728 except (KeyboardInterrupt, SystemExit): 729 raise 730 except: 731 # Not a footer either, so call this an error and 732 # attempt the next record 733 self._err_handler.error(record_exc) 734 record_exc = None 735 736 # But that means I need to reset the record reader(!) 737 x, lookahead = footer_reader.remainder() 738 try: 739 reader = self.make_reader( 740 *(fileobj,) + self.reader_args, 741 **{"lookahead": footer + lookahead}) 742 except (KeyboardInterrupt, SystemExit): 743 raise 744 except: 745 # Something unexpected happened. Save the 746 # current exception and stop reading 747 outfile = StringIO() 748 traceback.print_exc(file=outfile) 749 record_exc = ParserRecordException(outfile.getvalue(), 750 sys.exc_info()[1]) 751 break 752 753 754 755 # Hmm, okay, it was a valid footer, but can be it be 756 # parsed? 757 result = _parse_elements(footer, self.footer_tagtable, 758 self._cont_handler, self.debug_level, 759 self.attrlookup) 760 761 if result is None: 762 # parsed the footer, but need to check that it's 763 # at EOF 764 x, remainder = footer_reader.remainder() 765 if remainder or x.read(1): 766 # Acck, there's data left over 767 record_exc = ParserPositionException(filepos + 768 len(footer)) 769 self._err_handler.fatalError(record_exc) 770 self._cont_handler.endDocument() 771 return 772 # Success! 773 self._cont_handler.endElement(self.format_name) 774 self._cont_handler.endDocument() 775 return 776 else: 777 # Wasn't a footer, so reset the reader stream and skip 778 # past the record which I know I can read. 779 x, remainder = footer_reader.remainder() 780 reader = self.make_reader( 781 *(fileobj, ) + self.reader_args, 782 **{"lookahead": footer + remainder}) 783 record = reader.next() 784 self._err_handler.error(record_exc) 785 record_exc = None 786 787 filepos += len(record) 788 789 # Could not read a record or reached EOF. Try to parse the 790 # trailer 791 x, remainder = reader.remainder() 792 try: 793 footer_reader = self.make_footer_reader( 794 *(fileobj,) + self.footer_reader_args, 795 **{"lookahead": remainder}) 796 footer = footer_reader.next() 797 except (KeyboardInterrupt, SystemExit): 798 raise 799 except: 800 # Cannot read the record, so use the older error 801 self._err_handler.fatalError(record_exc) 802 self._cont_handler.endDocument() 803 return 804 805 if footer is None: 806 footer = "" 807 result = _parse_elements(footer, self.footer_tagtable, 808 self._cont_handler, self.debug_level, 809 self.attrlookup) 810 if result is None: 811 # parsed the footer, but need to check that it's 812 # at EOF 813 x, remainder = footer_reader.remainder() 814 if remainder or x.read(1): 815 # Acck, there's data left over 816 record_exc = ParserPositionException(filepos + 817 len(footer)) 818 self._err_handler.fatalError(record_exc) 819 self._cont_handler.endDocument() 820 return 821 # Success! 822 self._cont_handler.endElement(self.format_name) 823 self._cont_handler.endDocument() 824 return 825 else: 826 # Okay, use the old error 827 self._err_handler.fatalError(record_exc) 828 self._cont_handler.endDocument() 829 return
830