1
2
3
4
5
6 """Martel based parser to read MetaTool output files.
7
8 This is a huge regular regular expression for MetaTool 3.5 output, built using
9 the 'regular expressiona on steroids' capabilities of Martel.
10
11 http://www2.bioinf.mdc-berlin.de/metabolic/metatool/
12
13
14 This helps us have endlines be consistent across platforms.
15
16 """
17
18
19 from Martel import Opt, Alt, Digits, Integer, Group, Str, MaxRepeat
20 from Martel import Any, AnyBut, RepN, Rep, Rep1, ToEol, AnyEol
21 from Martel import Expression
22 from Martel import RecordReader
23
24 blank = ' '
25 tab = '\t'
26 blank_space = MaxRepeat( Any( blank + tab), 1, 80 )
27 optional_blank_space = Rep( Any( blank + tab ) )
28 white_space = " \t" + chr( 10 ) + chr( 13 )
29 blank_line = optional_blank_space + AnyEol()
30 lower_case_letter = Group( "lower_case_letter", Any( "abcdefghijklmnopqrstuvwxyz" ) )
31 digits = "0123456789"
32
33 enzyme = Group( "enzyme", optional_blank_space + Digits() +
34 optional_blank_space + Str( ':' ) + ToEol() )
35 reaction = Group( "reaction", optional_blank_space + Digits() +
36 optional_blank_space + Str( ":" ) + ToEol() )
37 not_found_line = Group( "not_found_line", optional_blank_space + Str( "- not found -" ) +
38 ToEol() )
39
40 enzymes_header = Group( "enzymes_header", optional_blank_space + Str( "enzymes" ) +
41 ToEol() )
42 enzymes_list = Group( "enzymes_list", Alt( Rep1( enzyme ), \
43 not_found_line ) )
44 enzymes_block = Group( "enzymes_block", enzymes_header + Rep( blank_line ) +
45 enzymes_list )
46
47 reactions_header = Group( "reactions_header", optional_blank_space +
48 Str( "overall reaction" ) + ToEol() )
49 reactions_list = Group( "reactions_list", Alt( Rep1( reaction ), \
50 not_found_line ) )
51 reactions_block = Group( "reactions_block", reactions_header + Rep( blank_line ) +
52 reactions_list )
53
54 rev = Group( "rev", Opt( lower_case_letter ) )
55 version = Group( "version", Digits( "version_major") + Any( "." ) +
56 Digits( "version_minor") + rev )
57 metatool_tag = Str( "METATOOL OUTPUT" )
58 metatool_line = Group( "metatool_line", metatool_tag + blank_space +
59 Str( "Version" ) + blank_space + version + ToEol() )
60
61 input_file_tag = Str( "INPUT FILE:" )
62 input_file_line = Group( "input_file_line", input_file_tag + blank_space +
63 ToEol( "input_file_name" ) )
64
65 metabolite_count_tag = Str( "INTERNAL METABOLITES:" )
66 metabolite_count_line = Group( "metabolite_count_line", metabolite_count_tag +
67 blank_space + Digits( "num_int_metabolites" ) + ToEol() )
68
69 reaction_count_tag = Str( "REACTIONS:" )
70 reaction_count_line = Group( "reaction_count_line", reaction_count_tag + blank_space +
71 Digits( "num_reactions" ) + ToEol() )
72
73 type_metabolite = Group( "type_metabolite", Alt( Str( "int" ), \
74 Str( "external" ) ) )
75 metabolite_info = Group( "metabolite_info", optional_blank_space +
76 Digits() + blank_space + type_metabolite + blank_space +
77
78 Rep1( AnyBut( white_space ) ) )
79 metabolite_line = Group( "metabolite_line", metabolite_info + ToEol() )
80 metabolites_summary = Group( "metabolites_summary", optional_blank_space + Digits() +
81 blank_space + Str( "metabolites" ) + ToEol() )
82 metabolites_block = Group( "metabolites_block", Rep1( metabolite_line ) +
83 metabolites_summary + Rep( blank_line ) )
84
85 graph_structure_heading = Group( "graph_structure_heading", optional_blank_space +
86 Str( "edges" ) + blank_space + Str( "frequency of nodes" ) + ToEol() )
87 graph_structure_line = Group( "graph_structure_line", optional_blank_space +
88 Digits( "edge_count" ) + blank_space + Digits( "num_nodes" ) + ToEol() )
89 graph_structure_block = Group( "graph_structure_block", \
90 graph_structure_heading + Rep( blank_line ) +
91 Rep1( graph_structure_line ) + Rep( blank_line ) )
92
93 sum_is_constant_line = Group( "sum_is_constant_line", optional_blank_space +
94 Digits() + optional_blank_space + Any( ":" ) + optional_blank_space +
95 Rep1( AnyBut( white_space ) ) +
96 Rep( blank_space + Any( "+" ) + blank_space + Rep1( AnyBut( white_space ) ) ) +
97 optional_blank_space + Str( "=" ) + ToEol() )
98 sum_is_constant_block = Group( "sum_is_constant_block", Rep( sum_is_constant_line ) )
99
100
101 stoichiometric_tag = Group( "stoichiometric_tag", Str( "STOICHIOMETRIC MATRIX" ) )
102 stoichiometric_line = Group( "stoichiometric_line", stoichiometric_tag +
103 ToEol() )
104
105 not_balanced_tag = Group( "not_balanced_tag", Str( "NOT BALANCED INTERNAL METABOLITES" ) )
106 not_balanced_line = Group( "not_balanced_line", not_balanced_tag +
107 ToEol() )
108
109 subsets_tag = Group( "subsets_tag", Str( "SUBSETS OF REACTIONS" ) )
110 subsets_line = Group( "subsets_line", \
111 subsets_tag + ToEol() )
112
113 reduced_system_tag = Group( "reduced_system_tag", Str( "REDUCED SYSTEM" ) )
114 reduced_system_line = Group( "reduced_system_line", reduced_system_tag +
115 Rep1( AnyBut( digits ) ) + Digits( "branch_points" ) +
116 Rep1( AnyBut( digits ) ) + Digits() + ToEol() )
117
118 kernel_tag = Group( "kernel_tag", Str( "KERNEL" ) )
119 kernel_line = Group( "kernel_line", kernel_tag + ToEol() )
120
121 convex_basis_tag = Group( "convex_basis_tag", Str( "CONVEX BASIS" ) )
122 convex_basis_line = Group( "convex_basis_line", convex_basis_tag +
123 ToEol() )
124
125 conservation_relations_tag = Group( "conservation_relations_tag", \
126 Str( "CONSERVATION RELATIONS" ) )
127 conservation_relations_line = Group( "conservation_relations_line", \
128 conservation_relations_tag + ToEol() )
129
130 elementary_modes_tag = Group( "elementary_modes_tag", \
131 Str( "ELEMENTARY MODES" ) )
132 elementary_modes_line = Group( "elementary_modes_line", \
133 elementary_modes_tag + ToEol() )
134
135 num_rows = Group( "num_rows", Digits() )
136 num_cols = Group( "num_cols", Digits() )
137 matrix_header = Group( "matrix_header", optional_blank_space +
138 Str( "matrix dimension" ) + blank_space + Any( "r" ) +
139 num_rows + blank_space + Any( "x" ) + blank_space +
140 Any( "c" ) + num_cols + optional_blank_space + AnyEol() )
141 matrix_element = Group( "matrix_element", Integer() )
142 matrix_row = Group( "matrix_row", MaxRepeat( optional_blank_space + matrix_element, \
143 "num_cols", "num_cols" ) + ToEol() )
144 matrix = Group( "matrix", MaxRepeat( matrix_row, "num_rows", "num_rows" ) )
145
146 matrix_block = Group( "matrix_block", matrix_header + matrix )
147 irreversible_vector = Group( "irreversible_vector", \
148 MaxRepeat( blank_space + matrix_element, "num_cols", "num_cols" ) +
149 ToEol() )
150
151 little_gap = Str( " " )
152 big_gap = Alt( Str( "\t" ), MaxRepeat( Str( " " ), 2, 80 ) )
153 unbalanced_metabolite = Group( "unbalanced_metabolite", \
154 Rep1( AnyBut( white_space ) ) + Opt( little_gap +
155 Rep1( AnyBut( white_space ) ) ) )
156 not_balanced_data = Group( "not_balanced_data", optional_blank_space +
157 unbalanced_metabolite + Rep( big_gap + unbalanced_metabolite ) + ToEol() )
158
159 metabolite_roles_heading = Group( "metabolite_roles_heading", \
160 Str( "->" ) + ToEol() )
161 metabolite_role_cols = Group( "metabolite_role_cols", \
162 optional_blank_space + Str( "met" ) + blank_space + Str( "cons" ) +
163 blank_space + Str( "built" ) +
164 blank_space + Str( "reactions" ) + ToEol() )
165 branch_metabolite = Group( "branch_metabolite", optional_blank_space +
166 Rep1( AnyBut( white_space ) ) + blank_space +
167 RepN( Digits() + blank_space, 3 ) + Rep1( Any( "ir" ) ) + ToEol() )
168 non_branch_metabolite = Group( "non_branch_metabolite", optional_blank_space +
169 Rep1( AnyBut( white_space ) ) + blank_space +
170 RepN( Digits() + blank_space, 3 ) + Rep1( Any( "ir" ) ) + ToEol() )
171 branch_metabolite_block = Group( "branch_metabolite_block", \
172 metabolite_roles_heading +
173 metabolite_role_cols + Rep( branch_metabolite ) )
174 non_branch_metabolite_block = Group( "non_branch_metabolite_block", \
175 metabolite_roles_heading +
176 metabolite_role_cols + Rep( non_branch_metabolite ) )
177
178 end_stoichiometric = Group( "end_stochiometric", \
179 Rep( Expression.Assert( not_balanced_tag, 1 ) +
180 Expression.Assert( kernel_tag, 1 ) + ToEol() ) )
181 end_not_balanced = Group( "end_not_balanced", \
182 Rep( Expression.Assert( kernel_tag, 1 ) + ToEol() ) )
183 end_kernel = Group( "end_kernel", \
184 Rep( Expression.Assert( subsets_tag, 1 ) + ToEol() ) )
185 end_subsets = Group( "end_subsets", \
186 Rep( Expression.Assert( reduced_system_tag, 1 ) + ToEol() ) )
187 end_reduced_system = Group( "end_reduced_system", \
188 Rep( Expression.Assert( convex_basis_tag, 1 ) + ToEol() ) )
189 end_convex_basis = Group( "end_convex_basis", \
190 Rep( Expression.Assert( conservation_relations_tag, 1 ) + ToEol() ) )
191 end_conservation_relations = Group( "end_conservation_relations", \
192 Rep( Expression.Assert( elementary_modes_tag, 1 ) + ToEol() ) )
193 end_elementary_modes = Group( "end_elementary_modes", Rep( ToEol() ) )
194
195
196 input_file_block = Group( "input_file_block", input_file_line +
197 Rep( blank_line ) )
198 metatool_block = Group( "metatool_block", metatool_line + Rep1( blank_line ) )
199
200 metabolite_count_block = Group( "metabolite_count_block", \
201 metabolite_count_line + Rep( blank_line ) )
202 reaction_count_block = Group( "reaction_count_block", reaction_count_line +
203 Rep( blank_line ) + metabolites_block + Rep( blank_line ) +
204 graph_structure_block + Rep( blank_line ) )
205 stoichiometric_block = Group( "stoichiometric_block", stoichiometric_line +
206 Rep( blank_line ) + matrix_block + ToEol() + irreversible_vector +
207 end_stoichiometric )
208 not_balanced_block = Group( "not_balanced_block", not_balanced_line +
209 Rep( blank_line ) + not_balanced_data + Rep( blank_line ) )
210 kernel_block = Group( "kernel_block", kernel_line + Rep( blank_line ) +
211 matrix_block + ToEol() + Rep( blank_line ) + enzymes_block +
212 Rep( blank_line ) + reactions_block + end_kernel )
213 subsets_block = Group( "subsets_block", subsets_line + Rep( blank_line ) +
214 matrix_block + ToEol() + Rep( blank_line ) + enzymes_block +
215 Rep( blank_line ) + reactions_block + end_subsets )
216 reduced_system_block = Group( "reduced_system_block", reduced_system_line +
217 Rep( blank_line ) + matrix_block + ToEol() + irreversible_vector +
218 Rep( blank_line ) + branch_metabolite_block + Rep( blank_line ) +
219 non_branch_metabolite_block + end_reduced_system )
220 convex_basis_block = Group( "convex_basis_block", convex_basis_line +
221 Rep( blank_line ) + matrix_block + Opt( ToEol() ) + Rep( blank_line ) +
222 enzymes_block + Rep( blank_line ) + reactions_block + end_convex_basis )
223 conservation_relations_block = Group( "conservation_relations_block", \
224 conservation_relations_line + Rep( blank_line ) + matrix_block +
225 Rep( blank_line ) + sum_is_constant_block +
226 end_conservation_relations )
227 elementary_modes_block = Group( "elementary_modes_block", elementary_modes_line +
228 Rep( blank_line ) + matrix_block + Opt( ToEol() ) + Rep( blank_line ) +
229 enzymes_block + Rep( blank_line ) + reactions_block + end_elementary_modes )
230
231
232 metatool_record = Group( "metatool_record", metatool_block + input_file_block +
233 metabolite_count_block + reaction_count_block + stoichiometric_block +
234 Opt( not_balanced_block ) + kernel_block + subsets_block +
235 reduced_system_block + convex_basis_block + conservation_relations_block +
236 elementary_modes_block )
237