Package Bio :: Package MetaTool :: Module metatool_format
[hide private]
[frames] | no frames]

Source Code for Module Bio.MetaTool.metatool_format

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Martel based parser to read MetaTool output files. 
  7   
  8  This is a huge regular regular expression for MetaTool 3.5 output, built using 
  9  the 'regular expressiona on steroids' capabilities of Martel. 
 10   
 11  http://www2.bioinf.mdc-berlin.de/metabolic/metatool/ 
 12   
 13   
 14  This helps us have endlines be consistent across platforms. 
 15   
 16  """ 
 17  # standard library 
 18  import string 
 19   
 20  # Martel 
 21  from Martel import Opt, Alt, Digits, Integer, Group, Str, MaxRepeat 
 22  from Martel import Any, AnyBut, RepN, Rep, Rep1, ToEol, AnyEol 
 23  from Martel import Expression 
 24  from Martel import RecordReader 
 25   
 26  blank = ' ' 
 27  tab = '\t' 
 28  blank_space = MaxRepeat( Any( blank + tab), 1, 80 ) 
 29  optional_blank_space = Rep( Any( blank + tab ) ) 
 30  white_space = " \t" + chr( 10 ) + chr( 13 ) 
 31  blank_line = optional_blank_space + AnyEol() 
 32  lower_case_letter = Group( "lower_case_letter", Any( "abcdefghijklmnopqrstuvwxyz" ) ) 
 33  digits = "0123456789" 
 34   
 35  enzyme = Group( "enzyme", optional_blank_space + Digits() + 
 36      optional_blank_space + Str( ':' ) + ToEol() ) 
 37  reaction = Group( "reaction", optional_blank_space + Digits() + 
 38      optional_blank_space + Str( ":" ) + ToEol() ) 
 39  not_found_line = Group( "not_found_line", optional_blank_space + Str( "- not found -" ) + 
 40      ToEol() ) 
 41   
 42  enzymes_header = Group( "enzymes_header", optional_blank_space + Str( "enzymes" ) + 
 43       ToEol() ) 
 44  enzymes_list = Group( "enzymes_list", Alt( Rep1( enzyme ), \ 
 45      not_found_line ) ) 
 46  enzymes_block = Group( "enzymes_block", enzymes_header + Rep( blank_line ) + 
 47      enzymes_list ) 
 48   
 49  reactions_header = Group( "reactions_header", optional_blank_space + 
 50      Str( "overall reaction" ) + ToEol() ) 
 51  reactions_list = Group( "reactions_list", Alt( Rep1( reaction ), \ 
 52      not_found_line ) ) 
 53  reactions_block = Group( "reactions_block", reactions_header + Rep( blank_line ) + 
 54      reactions_list ) 
 55   
 56  rev = Group( "rev", Opt( lower_case_letter ) ) 
 57  version = Group( "version", Digits( "version_major") + Any( "." ) + 
 58      Digits( "version_minor") + rev ) 
 59  metatool_tag = Str( "METATOOL OUTPUT" ) 
 60  metatool_line = Group( "metatool_line", metatool_tag + blank_space + 
 61      Str( "Version" ) + blank_space + version + ToEol() ) 
 62   
 63  input_file_tag = Str( "INPUT FILE:" ) 
 64  input_file_line = Group( "input_file_line", input_file_tag + blank_space + 
 65      ToEol( "input_file_name" ) ) 
 66   
 67  metabolite_count_tag = Str( "INTERNAL METABOLITES:" ) 
 68  metabolite_count_line = Group( "metabolite_count_line",  metabolite_count_tag + 
 69      blank_space + Digits( "num_int_metabolites" ) + ToEol() ) 
 70   
 71  reaction_count_tag = Str( "REACTIONS:" ) 
 72  reaction_count_line = Group( "reaction_count_line", reaction_count_tag + blank_space + 
 73      Digits( "num_reactions" ) + ToEol() ) 
 74   
 75  type_metabolite = Group( "type_metabolite", Alt( Str( "int" ), \ 
 76      Str( "external" ) ) ) 
 77  metabolite_info = Group( "metabolite_info", optional_blank_space + 
 78      Digits() + blank_space + type_metabolite + blank_space + 
 79  #    Integer() + blank_space + Rep1( lower_case_letter ) + 
 80      Rep1( AnyBut( white_space ) ) ) 
 81  metabolite_line = Group( "metabolite_line", metabolite_info + ToEol() ) 
 82  metabolites_summary = Group( "metabolites_summary", optional_blank_space + Digits() + 
 83      blank_space + Str( "metabolites" ) + ToEol() ) 
 84  metabolites_block = Group( "metabolites_block", Rep1( metabolite_line ) + 
 85      metabolites_summary + Rep( blank_line ) ) 
 86   
 87  graph_structure_heading = Group( "graph_structure_heading", optional_blank_space + 
 88      Str( "edges" ) + blank_space + Str( "frequency of nodes" ) + ToEol() ) 
 89  graph_structure_line = Group( "graph_structure_line", optional_blank_space + 
 90      Digits( "edge_count" ) + blank_space + Digits( "num_nodes" ) + ToEol() ) 
 91  graph_structure_block =  Group( "graph_structure_block", \ 
 92      graph_structure_heading + Rep( blank_line ) + 
 93      Rep1( graph_structure_line ) + Rep( blank_line ) ) 
 94   
 95  sum_is_constant_line = Group( "sum_is_constant_line", optional_blank_space + 
 96      Digits() + optional_blank_space + Any( ":" ) + optional_blank_space + 
 97      Rep1( AnyBut( white_space ) ) + 
 98      Rep( blank_space + Any( "+" ) + blank_space + Rep1( AnyBut( white_space ) ) ) + 
 99      optional_blank_space + Str( "=" ) + ToEol() ) 
100  sum_is_constant_block = Group( "sum_is_constant_block", Rep( sum_is_constant_line ) ) 
101   
102   
103  stoichiometric_tag = Group( "stoichiometric_tag", Str( "STOICHIOMETRIC MATRIX" ) ) 
104  stoichiometric_line = Group( "stoichiometric_line", stoichiometric_tag + 
105      ToEol() ) 
106   
107  not_balanced_tag = Group( "not_balanced_tag", Str( "NOT BALANCED INTERNAL METABOLITES" ) ) 
108  not_balanced_line = Group( "not_balanced_line", not_balanced_tag + 
109      ToEol() ) 
110   
111  subsets_tag = Group( "subsets_tag", Str( "SUBSETS OF REACTIONS" ) ) 
112  subsets_line = Group( "subsets_line", \ 
113       subsets_tag + ToEol() ) 
114   
115  reduced_system_tag = Group( "reduced_system_tag", Str( "REDUCED SYSTEM" ) ) 
116  reduced_system_line = Group( "reduced_system_line", reduced_system_tag + 
117      Rep1(  AnyBut( digits ) ) + Digits( "branch_points" ) + 
118      Rep1( AnyBut( digits ) ) + Digits() + ToEol() ) 
119   
120  kernel_tag = Group( "kernel_tag", Str( "KERNEL" ) ) 
121  kernel_line = Group( "kernel_line", kernel_tag + ToEol() ) 
122   
123  convex_basis_tag = Group( "convex_basis_tag", Str( "CONVEX BASIS" ) ) 
124  convex_basis_line = Group( "convex_basis_line", convex_basis_tag + 
125      ToEol() ) 
126   
127  conservation_relations_tag = Group( "conservation_relations_tag", \ 
128      Str( "CONSERVATION RELATIONS" ) ) 
129  conservation_relations_line = Group( "conservation_relations_line", \ 
130      conservation_relations_tag + ToEol() ) 
131   
132  elementary_modes_tag = Group( "elementary_modes_tag", \ 
133      Str( "ELEMENTARY MODES" ) ) 
134  elementary_modes_line = Group( "elementary_modes_line", \ 
135      elementary_modes_tag + ToEol() ) 
136   
137  num_rows = Group( "num_rows", Digits() ) 
138  num_cols = Group( "num_cols", Digits() ) 
139  matrix_header = Group( "matrix_header", optional_blank_space + 
140      Str( "matrix dimension" ) + blank_space  + Any( "r" ) + 
141      num_rows + blank_space +  Any( "x" ) + blank_space + 
142      Any( "c" ) + num_cols + optional_blank_space + AnyEol() ) 
143  matrix_element = Group( "matrix_element", Integer() ) 
144  matrix_row = Group( "matrix_row", MaxRepeat( optional_blank_space + matrix_element, \ 
145      "num_cols", "num_cols" ) + ToEol() ) 
146  matrix = Group( "matrix", MaxRepeat( matrix_row, "num_rows", "num_rows" ) ) 
147   
148  matrix_block = Group( "matrix_block", matrix_header + matrix ) 
149  irreversible_vector = Group( "irreversible_vector", \ 
150      MaxRepeat( blank_space + matrix_element, "num_cols", "num_cols" ) +  
151      ToEol() ) 
152   
153  little_gap = Str( " " ) 
154  big_gap = Alt( Str( "\t" ), MaxRepeat( Str( " " ), 2, 80 ) ) 
155  unbalanced_metabolite = Group( "unbalanced_metabolite", \ 
156      Rep1( AnyBut( white_space ) ) + Opt( little_gap + 
157      Rep1( AnyBut( white_space ) ) ) ) 
158  not_balanced_data = Group( "not_balanced_data", optional_blank_space + 
159      unbalanced_metabolite + Rep( big_gap + unbalanced_metabolite ) + ToEol() ) 
160   
161  metabolite_roles_heading = Group( "metabolite_roles_heading", \ 
162      Str( "->" ) + ToEol() ) 
163  metabolite_role_cols = Group( "metabolite_role_cols", \ 
164      optional_blank_space + Str( "met" ) + blank_space + Str( "cons" ) + 
165      blank_space + Str( "built" ) + 
166      blank_space + Str( "reactions" ) + ToEol() ) 
167  branch_metabolite = Group( "branch_metabolite", optional_blank_space + 
168      Rep1( AnyBut( white_space ) ) + blank_space + 
169      RepN( Digits() + blank_space, 3 ) + Rep1( Any( "ir" ) ) + ToEol() ) 
170  non_branch_metabolite = Group( "non_branch_metabolite", optional_blank_space + 
171      Rep1( AnyBut( white_space ) ) + blank_space + 
172      RepN( Digits() + blank_space, 3 ) + Rep1( Any( "ir" ) ) + ToEol() ) 
173  branch_metabolite_block = Group( "branch_metabolite_block", \ 
174      metabolite_roles_heading + 
175      metabolite_role_cols + Rep( branch_metabolite ) ) 
176  non_branch_metabolite_block = Group( "non_branch_metabolite_block", \ 
177      metabolite_roles_heading + 
178      metabolite_role_cols + Rep( non_branch_metabolite ) ) 
179   
180  end_stoichiometric = Group( "end_stochiometric", \ 
181      Rep( Expression.Assert( not_balanced_tag, 1 ) + 
182      Expression.Assert( kernel_tag, 1 ) + ToEol() ) ) 
183  end_not_balanced = Group( "end_not_balanced", \ 
184      Rep( Expression.Assert( kernel_tag, 1 ) + ToEol() ) ) 
185  end_kernel = Group( "end_kernel", \ 
186      Rep( Expression.Assert( subsets_tag, 1 ) + ToEol() ) ) 
187  end_subsets = Group( "end_subsets", \ 
188      Rep( Expression.Assert( reduced_system_tag, 1 ) + ToEol() ) ) 
189  end_reduced_system = Group( "end_reduced_system", \ 
190      Rep( Expression.Assert( convex_basis_tag, 1 ) + ToEol() ) ) 
191  end_convex_basis = Group( "end_convex_basis", \ 
192      Rep( Expression.Assert( conservation_relations_tag, 1 ) + ToEol() ) ) 
193  end_conservation_relations = Group( "end_conservation_relations", \ 
194      Rep( Expression.Assert( elementary_modes_tag, 1 ) + ToEol() ) ) 
195  end_elementary_modes = Group( "end_elementary_modes", Rep( ToEol() ) ) 
196  #    Rep1( AnyBut( '.') ) + Str( "." ) ) 
197   
198  input_file_block = Group( "input_file_block", input_file_line + 
199      Rep( blank_line ) ) 
200  metatool_block = Group( "metatool_block", metatool_line + Rep1( blank_line ) ) 
201   
202  metabolite_count_block = Group( "metabolite_count_block", \ 
203      metabolite_count_line + Rep( blank_line ) ) 
204  reaction_count_block = Group( "reaction_count_block", reaction_count_line + 
205      Rep( blank_line ) + metabolites_block + Rep( blank_line ) + 
206      graph_structure_block + Rep( blank_line ) ) 
207  stoichiometric_block = Group( "stoichiometric_block", stoichiometric_line + 
208      Rep( blank_line ) + matrix_block + ToEol() + irreversible_vector + 
209      end_stoichiometric ) 
210  not_balanced_block = Group( "not_balanced_block", not_balanced_line + 
211      Rep( blank_line ) + not_balanced_data + Rep( blank_line ) ) 
212  kernel_block = Group( "kernel_block", kernel_line + Rep( blank_line ) + 
213      matrix_block + ToEol() + Rep( blank_line ) + enzymes_block + 
214      Rep( blank_line ) + reactions_block + end_kernel ) 
215  subsets_block = Group( "subsets_block", subsets_line + Rep( blank_line ) + 
216      matrix_block + ToEol() + Rep( blank_line ) + enzymes_block + 
217      Rep( blank_line ) + reactions_block + end_subsets ) 
218  reduced_system_block = Group( "reduced_system_block", reduced_system_line + 
219      Rep( blank_line ) + matrix_block + ToEol() + irreversible_vector + 
220      Rep( blank_line ) + branch_metabolite_block + Rep( blank_line ) + 
221      non_branch_metabolite_block + end_reduced_system ) 
222  convex_basis_block = Group( "convex_basis_block", convex_basis_line + 
223      Rep( blank_line ) + matrix_block + Opt( ToEol() ) + Rep( blank_line ) + 
224      enzymes_block + Rep( blank_line ) + reactions_block + end_convex_basis ) 
225  conservation_relations_block = Group( "conservation_relations_block", \ 
226      conservation_relations_line + Rep( blank_line ) + matrix_block + 
227      Rep( blank_line ) + sum_is_constant_block + 
228      end_conservation_relations ) 
229  elementary_modes_block = Group( "elementary_modes_block", elementary_modes_line + 
230      Rep( blank_line ) + matrix_block + Opt( ToEol() ) + Rep( blank_line ) + 
231      enzymes_block + Rep( blank_line ) + reactions_block + end_elementary_modes ) 
232   
233   
234  metatool_record = Group( "metatool_record", metatool_block + input_file_block + 
235     metabolite_count_block + reaction_count_block + stoichiometric_block + 
236      Opt( not_balanced_block ) + kernel_block + subsets_block + 
237      reduced_system_block + convex_basis_block + conservation_relations_block + 
238      elementary_modes_block ) 
239