1
2
3
4 """Implement Martel parsers.
5
6 The classes in this module are used by other Martel modules and not
7 typically by external users.
8
9 There are two major parsers, 'Parser' and 'RecordParser.' The first
10 is the standard one, which parses the file as one string in memory
11 then generates the SAX events. The other reads a record at a time
12 using a RecordReader and generates events after each read. The
13 generated event callbacks are identical.
14
15 At some level, both parsers use "_do_callback" to convert mxTextTools
16 tags into SAX events.
17
18 XXX finish this documentation
19
20 XXX need a better way to get closer to the likely error position when
21 parsing.
22
23 XXX need to implement Locator
24
25 """
26 import urllib, traceback, sys
27 from xml.sax import handler, saxutils
28 import Parser, RecordReader
29
30 try:
31 from cStringIO import StringIO
32 except ImportError:
33 from StringIO import StringIO
34
35
37 - def __init__(self, record_parser, make_reader, reader_args, marker_tag):
38 self.record_parser = record_parser
39 self.make_reader = make_reader
40 self.reader_args = reader_args
41 self.marker_tag = marker_tag
42
44 return IterRecords(self.record_parser.copy(),
45 self.make_reader,
46 self.reader_args,
47 self.marker_tag)
48
49 - def iterate(self, source, cont_handler = None):
53
56
58 self.start_position = 0
59 if cont_handler is None:
60 import LAX
61 cont_handler = LAX.LAX()
62 self.record_parser.setContentHandler(cont_handler)
63
64 reader = self.make_reader(fileobj, *self.reader_args)
65 while 1:
66 try:
67 rec = reader.next()
68 except RecordReader.ReaderError:
69 raise Parser.ParserPositionException(self.start_position)
70 if rec is None:
71 break
72 self.end_position = self.start_position + len(rec)
73 try:
74 self.record_parser.parseString(rec)
75 except Parser.ParserPositionException, exc:
76 exc += self.start_position
77 raise
78
79 yield cont_handler
80 self.start_position = self.end_position
81
82 fileobj, lookahead = reader.remainder()
83 if lookahead or fileobj.read(1):
84 raise Parser.ParserPositionException(self.start_position)
85
86
223