Package Bio :: Package MetaTool :: Module metatool_format
[hide private]
[frames] | no frames]

Source Code for Module Bio.MetaTool.metatool_format

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Martel based parser to read MetaTool output files. 
  7   
  8  This is a huge regular regular expression for MetaTool 3.5 output, built using 
  9  the 'regular expressiona on steroids' capabilities of Martel. 
 10   
 11  http://www2.bioinf.mdc-berlin.de/metabolic/metatool/ 
 12   
 13   
 14  This helps us have endlines be consistent across platforms. 
 15   
 16  """ 
 17   
 18  # Martel 
 19  from Martel import Opt, Alt, Digits, Integer, Group, Str, MaxRepeat 
 20  from Martel import Any, AnyBut, RepN, Rep, Rep1, ToEol, AnyEol 
 21  from Martel import Expression 
 22  from Martel import RecordReader 
 23   
 24  blank = ' ' 
 25  tab = '\t' 
 26  blank_space = MaxRepeat( Any( blank + tab), 1, 80 ) 
 27  optional_blank_space = Rep( Any( blank + tab ) ) 
 28  white_space = " \t" + chr( 10 ) + chr( 13 ) 
 29  blank_line = optional_blank_space + AnyEol() 
 30  lower_case_letter = Group( "lower_case_letter", Any( "abcdefghijklmnopqrstuvwxyz" ) ) 
 31  digits = "0123456789" 
 32   
 33  enzyme = Group( "enzyme", optional_blank_space + Digits() + 
 34      optional_blank_space + Str( ':' ) + ToEol() ) 
 35  reaction = Group( "reaction", optional_blank_space + Digits() + 
 36      optional_blank_space + Str( ":" ) + ToEol() ) 
 37  not_found_line = Group( "not_found_line", optional_blank_space + Str( "- not found -" ) + 
 38      ToEol() ) 
 39   
 40  enzymes_header = Group( "enzymes_header", optional_blank_space + Str( "enzymes" ) + 
 41       ToEol() ) 
 42  enzymes_list = Group( "enzymes_list", Alt( Rep1( enzyme ), \ 
 43      not_found_line ) ) 
 44  enzymes_block = Group( "enzymes_block", enzymes_header + Rep( blank_line ) + 
 45      enzymes_list ) 
 46   
 47  reactions_header = Group( "reactions_header", optional_blank_space + 
 48      Str( "overall reaction" ) + ToEol() ) 
 49  reactions_list = Group( "reactions_list", Alt( Rep1( reaction ), \ 
 50      not_found_line ) ) 
 51  reactions_block = Group( "reactions_block", reactions_header + Rep( blank_line ) + 
 52      reactions_list ) 
 53   
 54  rev = Group( "rev", Opt( lower_case_letter ) ) 
 55  version = Group( "version", Digits( "version_major") + Any( "." ) + 
 56      Digits( "version_minor") + rev ) 
 57  metatool_tag = Str( "METATOOL OUTPUT" ) 
 58  metatool_line = Group( "metatool_line", metatool_tag + blank_space + 
 59      Str( "Version" ) + blank_space + version + ToEol() ) 
 60   
 61  input_file_tag = Str( "INPUT FILE:" ) 
 62  input_file_line = Group( "input_file_line", input_file_tag + blank_space + 
 63      ToEol( "input_file_name" ) ) 
 64   
 65  metabolite_count_tag = Str( "INTERNAL METABOLITES:" ) 
 66  metabolite_count_line = Group( "metabolite_count_line",  metabolite_count_tag + 
 67      blank_space + Digits( "num_int_metabolites" ) + ToEol() ) 
 68   
 69  reaction_count_tag = Str( "REACTIONS:" ) 
 70  reaction_count_line = Group( "reaction_count_line", reaction_count_tag + blank_space + 
 71      Digits( "num_reactions" ) + ToEol() ) 
 72   
 73  type_metabolite = Group( "type_metabolite", Alt( Str( "int" ), \ 
 74      Str( "external" ) ) ) 
 75  metabolite_info = Group( "metabolite_info", optional_blank_space + 
 76      Digits() + blank_space + type_metabolite + blank_space + 
 77  #    Integer() + blank_space + Rep1( lower_case_letter ) + 
 78      Rep1( AnyBut( white_space ) ) ) 
 79  metabolite_line = Group( "metabolite_line", metabolite_info + ToEol() ) 
 80  metabolites_summary = Group( "metabolites_summary", optional_blank_space + Digits() + 
 81      blank_space + Str( "metabolites" ) + ToEol() ) 
 82  metabolites_block = Group( "metabolites_block", Rep1( metabolite_line ) + 
 83      metabolites_summary + Rep( blank_line ) ) 
 84   
 85  graph_structure_heading = Group( "graph_structure_heading", optional_blank_space + 
 86      Str( "edges" ) + blank_space + Str( "frequency of nodes" ) + ToEol() ) 
 87  graph_structure_line = Group( "graph_structure_line", optional_blank_space + 
 88      Digits( "edge_count" ) + blank_space + Digits( "num_nodes" ) + ToEol() ) 
 89  graph_structure_block =  Group( "graph_structure_block", \ 
 90      graph_structure_heading + Rep( blank_line ) + 
 91      Rep1( graph_structure_line ) + Rep( blank_line ) ) 
 92   
 93  sum_is_constant_line = Group( "sum_is_constant_line", optional_blank_space + 
 94      Digits() + optional_blank_space + Any( ":" ) + optional_blank_space + 
 95      Rep1( AnyBut( white_space ) ) + 
 96      Rep( blank_space + Any( "+" ) + blank_space + Rep1( AnyBut( white_space ) ) ) + 
 97      optional_blank_space + Str( "=" ) + ToEol() ) 
 98  sum_is_constant_block = Group( "sum_is_constant_block", Rep( sum_is_constant_line ) ) 
 99   
100   
101  stoichiometric_tag = Group( "stoichiometric_tag", Str( "STOICHIOMETRIC MATRIX" ) ) 
102  stoichiometric_line = Group( "stoichiometric_line", stoichiometric_tag + 
103      ToEol() ) 
104   
105  not_balanced_tag = Group( "not_balanced_tag", Str( "NOT BALANCED INTERNAL METABOLITES" ) ) 
106  not_balanced_line = Group( "not_balanced_line", not_balanced_tag + 
107      ToEol() ) 
108   
109  subsets_tag = Group( "subsets_tag", Str( "SUBSETS OF REACTIONS" ) ) 
110  subsets_line = Group( "subsets_line", \ 
111       subsets_tag + ToEol() ) 
112   
113  reduced_system_tag = Group( "reduced_system_tag", Str( "REDUCED SYSTEM" ) ) 
114  reduced_system_line = Group( "reduced_system_line", reduced_system_tag + 
115      Rep1(  AnyBut( digits ) ) + Digits( "branch_points" ) + 
116      Rep1( AnyBut( digits ) ) + Digits() + ToEol() ) 
117   
118  kernel_tag = Group( "kernel_tag", Str( "KERNEL" ) ) 
119  kernel_line = Group( "kernel_line", kernel_tag + ToEol() ) 
120   
121  convex_basis_tag = Group( "convex_basis_tag", Str( "CONVEX BASIS" ) ) 
122  convex_basis_line = Group( "convex_basis_line", convex_basis_tag + 
123      ToEol() ) 
124   
125  conservation_relations_tag = Group( "conservation_relations_tag", \ 
126      Str( "CONSERVATION RELATIONS" ) ) 
127  conservation_relations_line = Group( "conservation_relations_line", \ 
128      conservation_relations_tag + ToEol() ) 
129   
130  elementary_modes_tag = Group( "elementary_modes_tag", \ 
131      Str( "ELEMENTARY MODES" ) ) 
132  elementary_modes_line = Group( "elementary_modes_line", \ 
133      elementary_modes_tag + ToEol() ) 
134   
135  num_rows = Group( "num_rows", Digits() ) 
136  num_cols = Group( "num_cols", Digits() ) 
137  matrix_header = Group( "matrix_header", optional_blank_space + 
138      Str( "matrix dimension" ) + blank_space  + Any( "r" ) + 
139      num_rows + blank_space +  Any( "x" ) + blank_space + 
140      Any( "c" ) + num_cols + optional_blank_space + AnyEol() ) 
141  matrix_element = Group( "matrix_element", Integer() ) 
142  matrix_row = Group( "matrix_row", MaxRepeat( optional_blank_space + matrix_element, \ 
143      "num_cols", "num_cols" ) + ToEol() ) 
144  matrix = Group( "matrix", MaxRepeat( matrix_row, "num_rows", "num_rows" ) ) 
145   
146  matrix_block = Group( "matrix_block", matrix_header + matrix ) 
147  irreversible_vector = Group( "irreversible_vector", \ 
148      MaxRepeat( blank_space + matrix_element, "num_cols", "num_cols" ) +  
149      ToEol() ) 
150   
151  little_gap = Str( " " ) 
152  big_gap = Alt( Str( "\t" ), MaxRepeat( Str( " " ), 2, 80 ) ) 
153  unbalanced_metabolite = Group( "unbalanced_metabolite", \ 
154      Rep1( AnyBut( white_space ) ) + Opt( little_gap + 
155      Rep1( AnyBut( white_space ) ) ) ) 
156  not_balanced_data = Group( "not_balanced_data", optional_blank_space + 
157      unbalanced_metabolite + Rep( big_gap + unbalanced_metabolite ) + ToEol() ) 
158   
159  metabolite_roles_heading = Group( "metabolite_roles_heading", \ 
160      Str( "->" ) + ToEol() ) 
161  metabolite_role_cols = Group( "metabolite_role_cols", \ 
162      optional_blank_space + Str( "met" ) + blank_space + Str( "cons" ) + 
163      blank_space + Str( "built" ) + 
164      blank_space + Str( "reactions" ) + ToEol() ) 
165  branch_metabolite = Group( "branch_metabolite", optional_blank_space + 
166      Rep1( AnyBut( white_space ) ) + blank_space + 
167      RepN( Digits() + blank_space, 3 ) + Rep1( Any( "ir" ) ) + ToEol() ) 
168  non_branch_metabolite = Group( "non_branch_metabolite", optional_blank_space + 
169      Rep1( AnyBut( white_space ) ) + blank_space + 
170      RepN( Digits() + blank_space, 3 ) + Rep1( Any( "ir" ) ) + ToEol() ) 
171  branch_metabolite_block = Group( "branch_metabolite_block", \ 
172      metabolite_roles_heading + 
173      metabolite_role_cols + Rep( branch_metabolite ) ) 
174  non_branch_metabolite_block = Group( "non_branch_metabolite_block", \ 
175      metabolite_roles_heading + 
176      metabolite_role_cols + Rep( non_branch_metabolite ) ) 
177   
178  end_stoichiometric = Group( "end_stochiometric", \ 
179      Rep( Expression.Assert( not_balanced_tag, 1 ) + 
180      Expression.Assert( kernel_tag, 1 ) + ToEol() ) ) 
181  end_not_balanced = Group( "end_not_balanced", \ 
182      Rep( Expression.Assert( kernel_tag, 1 ) + ToEol() ) ) 
183  end_kernel = Group( "end_kernel", \ 
184      Rep( Expression.Assert( subsets_tag, 1 ) + ToEol() ) ) 
185  end_subsets = Group( "end_subsets", \ 
186      Rep( Expression.Assert( reduced_system_tag, 1 ) + ToEol() ) ) 
187  end_reduced_system = Group( "end_reduced_system", \ 
188      Rep( Expression.Assert( convex_basis_tag, 1 ) + ToEol() ) ) 
189  end_convex_basis = Group( "end_convex_basis", \ 
190      Rep( Expression.Assert( conservation_relations_tag, 1 ) + ToEol() ) ) 
191  end_conservation_relations = Group( "end_conservation_relations", \ 
192      Rep( Expression.Assert( elementary_modes_tag, 1 ) + ToEol() ) ) 
193  end_elementary_modes = Group( "end_elementary_modes", Rep( ToEol() ) ) 
194  #    Rep1( AnyBut( '.') ) + Str( "." ) ) 
195   
196  input_file_block = Group( "input_file_block", input_file_line + 
197      Rep( blank_line ) ) 
198  metatool_block = Group( "metatool_block", metatool_line + Rep1( blank_line ) ) 
199   
200  metabolite_count_block = Group( "metabolite_count_block", \ 
201      metabolite_count_line + Rep( blank_line ) ) 
202  reaction_count_block = Group( "reaction_count_block", reaction_count_line + 
203      Rep( blank_line ) + metabolites_block + Rep( blank_line ) + 
204      graph_structure_block + Rep( blank_line ) ) 
205  stoichiometric_block = Group( "stoichiometric_block", stoichiometric_line + 
206      Rep( blank_line ) + matrix_block + ToEol() + irreversible_vector + 
207      end_stoichiometric ) 
208  not_balanced_block = Group( "not_balanced_block", not_balanced_line + 
209      Rep( blank_line ) + not_balanced_data + Rep( blank_line ) ) 
210  kernel_block = Group( "kernel_block", kernel_line + Rep( blank_line ) + 
211      matrix_block + ToEol() + Rep( blank_line ) + enzymes_block + 
212      Rep( blank_line ) + reactions_block + end_kernel ) 
213  subsets_block = Group( "subsets_block", subsets_line + Rep( blank_line ) + 
214      matrix_block + ToEol() + Rep( blank_line ) + enzymes_block + 
215      Rep( blank_line ) + reactions_block + end_subsets ) 
216  reduced_system_block = Group( "reduced_system_block", reduced_system_line + 
217      Rep( blank_line ) + matrix_block + ToEol() + irreversible_vector + 
218      Rep( blank_line ) + branch_metabolite_block + Rep( blank_line ) + 
219      non_branch_metabolite_block + end_reduced_system ) 
220  convex_basis_block = Group( "convex_basis_block", convex_basis_line + 
221      Rep( blank_line ) + matrix_block + Opt( ToEol() ) + Rep( blank_line ) + 
222      enzymes_block + Rep( blank_line ) + reactions_block + end_convex_basis ) 
223  conservation_relations_block = Group( "conservation_relations_block", \ 
224      conservation_relations_line + Rep( blank_line ) + matrix_block + 
225      Rep( blank_line ) + sum_is_constant_block + 
226      end_conservation_relations ) 
227  elementary_modes_block = Group( "elementary_modes_block", elementary_modes_line + 
228      Rep( blank_line ) + matrix_block + Opt( ToEol() ) + Rep( blank_line ) + 
229      enzymes_block + Rep( blank_line ) + reactions_block + end_elementary_modes ) 
230   
231   
232  metatool_record = Group( "metatool_record", metatool_block + input_file_block + 
233     metabolite_count_block + reaction_count_block + stoichiometric_block + 
234      Opt( not_balanced_block ) + kernel_block + subsets_block + 
235      reduced_system_block + convex_basis_block + conservation_relations_block + 
236      elementary_modes_block ) 
237