1
2
3
4
5 """Parser for the MASE/Intelligenetics alignment file format.
6
7 http://pbil.univ-lyon1.fr/help/formats.html
8 """
9
10
11 import string
12 import array
13 import os
14 import re
15 import sgmllib
16 import urlparse
17
18
19 from xml.sax import handler
20
21
22 import Martel
23 from Martel import RecordReader
24
25 from Bio.ParserSupport import EventGenerator
26 from Bio.ParserSupport import AbstractConsumer
27 from Bio import File
28 import intelligenetics_format
29 import Record
31 """Iterator interface to move over a file of IntelliGenetics entries one at a time.
32 """
33 - def __init__(self, handle, parser = None):
34 """Initialize the iterator.
35
36 Arguments:
37 o handle - A handle with IntelliGenetics entries to iterate through.
38 o parser - An optional parser to pass the entries through before
39 returning them. If None, then the raw entry will be returned.
40 """
41 self.handle = File.UndoHandle( handle )
42 self._reader = IntelliGeneticsReader( self.handle )
43 self._parser = parser
44
46 """Return the next IntelliGenetics record from the handle.
47
48 Will return None if we ran out of records.
49 """
50 data = self._reader.next()
51
52 if self._parser is not None:
53 if data:
54 dumpfile = open( 'dump', 'w' )
55 dumpfile.write( data )
56 dumpfile.close()
57 return self._parser.parse(File.StringHandle(data))
58
59 return data
60
62 return iter(self.next, None)
63
65 """Start up Martel to do the scanning of the file.
66
67 This initialzes the Martel based parser and connects it to a handler
68 that will generate events for a Feature Consumer.
69 """
71 """Initialize the scanner by setting up our caches.
72
73 Creating the parser takes a long time, so we want to cache it
74 to reduce parsing time.
75
76 Arguments:
77 o debug - The level of debugging that the parser should
78 display. Level 0 is no debugging, Level 2 displays the most
79 debugging info (but is much slower). See Martel documentation
80 for more info on this.
81 """
82
83
84 self.interest_tags = ["comment", "title_line", "sequence" ]
85
86
87 expression = Martel.select_names(intelligenetics_format.intelligenetics_record, self.interest_tags)
88 self._parser = expression.make_parser(debug_level = debug)
89
90 - def feed(self, handle, consumer):
91 """Feeed a set of data into the scanner.
92
93 Arguments:
94 o handle - A handle with the information to parse.
95 o consumer - The consumer that should be informed of events.
96 """
97 self._parser.setContentHandler( EventGenerator(consumer,
98 self.interest_tags))
99
100
101 self._parser.parseFile(handle)
102
104 """Create an IntelliGenetics Record object from scanner generated information.
105 """
108
109
112
116
120
122 """Parse IntelliGenetics files into Record objects
123 """
125 """Initialize the parser.
126
127 Arguments:
128 o debug_level - An optional argument that specifies the amount of
129 debugging information Martel should spit out. By default we have
130 no debugging info (the fastest way to do things), but if you want
131 you can set this as high as two and see exactly where a parse fails.
132 """
133 self._scanner = _Scanner(debug_level)
134
135 - def parse(self, handle):
136 """Parse the specified handle into a GenBank record.
137 """
138 self._consumer = _RecordConsumer()
139 self._scanner.feed(handle, self._consumer)
140 return self._consumer.data
141
143
146
148 infile = self.infile
149 state = 'COMMENT_STATE'
150 record = ''
151 while( state != 'DONE' ):
152 line = infile.readline()
153 if( line == '' ):
154 state = 'DONE'
155 break
156 if( line[ 0 ] == ';' ):
157 if( state == 'SEQUENCE_STATE' ):
158 state = 'DONE'
159 infile.saveline( line )
160 elif( state == 'COMMENT_STATE' ):
161 record = record + line
162 else:
163 if( state == 'COMMENT_STATE' ):
164 record = record + line
165 state = 'SEQUENCE_STATE'
166 elif( state == 'SEQUENCE_STATE' ):
167 record = record + line
168 return record
169