Package Bio :: Module Decode
[hide private]
[frames] | no frames]

Source Code for Module Bio.Decode

  1  # Copyright 2002 by Andrew Dalke. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Decode elements from a Std/Martel parsed XML stream (OBSOLETE). 
  7   
  8  Andrew Dalke is no longer maintaining Martel or Bio.Mindy, and these modules 
  9  (and therefore Bio.Decode) have been deprecated.  They are no longer used in 
 10  any of the current Biopython parsers, and are likely to be removed in a 
 11  future release.""" 
 12   
 13  import warnings 
 14  warnings.warn("Martel and those parts of Biopython depending on it" \ 
 15                +" directly (such as Bio.Mindy and Bio.Decode) are now" \ 
 16                +" deprecated, and will be removed in a future release of"\ 
 17                +" Biopython.  If you want to continue to use this code,"\ 
 18                +" please get in contact with the Biopython developers via"\ 
 19                +" the mailing lists to avoid its permanent removal from"\ 
 20                +" Biopython.", \ 
 21                DeprecationWarning) 
 22   
 23  import string 
 24  from Bio.Parsers.spark import GenericScanner, GenericParser 
 25   
26 -def unescape_C(s):
27 result = [] 28 for i in range(len(s)): 29 if s[i] != "\\": 30 result.append(s[i]) 31 continue 32 c = s[i+1:i+2] 33 if c == "x": 34 x = s[i+2:i+4] 35 if len(x) != 2: 36 raise ValueError("invalid \\x escape") 37 i = int(x, 16) 38 result.append(chr(i)) 39 continue 40 if c in "01234567": 41 x = s[i+1:i+4] 42 # \octals don't do a length assertion check 43 i = int(x, 8) 44 result.append(chr(i)) 45 continue 46 result.append(c) 47 return "".join(result)
48
49 -def join_english(fields):
50 if not fields: 51 return "" 52 s = fields[0] 53 for field in fields[1:]: 54 if s[-1:] == "-" and s[-3:-2] == "-": 55 s = s + field 56 continue 57 if s.find(" ") == -1 and field.find(" ") == -1: 58 s = s + field 59 continue 60 s = s + " " + field 61 return (" ".join(s.split())).strip()
62 63 64
65 -def chomp(s, c):
66 if s[-1:] == c: 67 return s[:-1] 68 return s
69
70 -def lchomp(s, c):
71 if s[:1] == c: 72 return s[1:] 73 return s
74
75 -def chompchomp(s, c):
76 if s[:1] == c and s[-1:] == c: 77 return s[1:-1] 78 return s
79
80 -def fixspaces(s):
81 # s.split breaks down to a list of words 82 # " ".join puts them together 83 # strip removes leading and trailing spaces 84 return " ".join(s.split()).strip()
85
86 -def join_fixspaces(lines):
87 return " ".join((" ".join(lines)).split()).strip()
88
89 -def tr(s, frm, to):
90 table = string.maketrans(frm, to) 91 return s.translate(table)
92
93 -def safe_int(s):
94 """converts to int if the number is small, long if it's large""" 95 try: 96 return int(s) 97 except ValueError: 98 return long(s)
99 100 decode_functions = { 101 "chomp": (chomp, str, str), 102 "chompchomp": (chompchomp, str, str), 103 "chop": (lambda s: s[:-1], str, str), 104 "chopchop": (lambda s: s[1:-1], str, str), 105 "fixspaces": (fixspaces, str, str), 106 "lchomp": (lchomp, str, str), 107 "lchop": (lambda s: s[1:], str, str), 108 "lower": (lambda s: s.lower(), str, str), 109 "lstrip": (lambda s: s.lstrip(), str, str), 110 "replace": (lambda s, old, new: s.replace(old, new), str, str), 111 "rstrip": (lambda s: s.rstrip(), str, str), 112 "str": (str, str, str), 113 "strip": (lambda s: s.strip(), str, str), 114 "tr": (tr, str, str), 115 "unescape.c": (unescape_C, str, str), 116 "unescape.doublequote": (lambda s: s.replace('""', '"'), str, str), 117 "unescape.singlequote": (lambda s: s.replace("''", "'"), str, str), 118 "upper": (lambda s: s.upper(), str, str), 119 120 # List operations 121 "join": (lambda lst, s = " ": s.join(lst), list, str), 122 "join.english": (join_english, list, str), 123 124 # Integer operations 125 "int": (safe_int, [float, str, int], int), 126 "int.comma": (lambda s: safe_int(s.replace(",", "")), 127 [float, str, int], int), 128 "hex": (hex, str, int), 129 "oct": (oct, str, int), 130 "add": ((lambda i, j: i+j), int, int), 131 132 # Float operations 133 "float": (float, (float, str, int), float), 134 135 } 136
137 -def _fixup_defs():
138 # Normalize so the 2nd and 3rd terms are tuples 139 for k, v in decode_functions.items(): 140 f, in_types, out_types = v 141 if isinstance(in_types, type([])): 142 in_types = tuple(in_types) 143 elif not isinstance(in_types, type( () )): 144 in_types = (in_types,) 145 146 if isinstance(out_types, type([])): 147 out_types = tuple(out_types) 148 elif not isinstance(out_types, type( () )): 149 out_types = (out_types,) 150 151 decode_functions[k] = (f, in_types, out_types)
152 _fixup_defs() 153
154 -class Token:
155 - def __init__(self, type):
156 self.type = type
157 - def __cmp__(self, other):
158 return cmp(self.type, other)
159 - def __repr__(self):
160 return "Token(%r)" % (self.type,)
161
162 -class ValueToken(Token):
163 - def __init__(self, type, val):
164 Token.__init__(self, type) 165 self.val = val
166 - def __cmp__(self, other):
167 return cmp(self.type, other)
168 - def __repr__(self):
169 return "%s(%r)" % (self.__class__.__name__, self.val)
170 - def __str__(self):
171 return str(self.val)
172
173 -class Integer(ValueToken):
174 - def __init__(self, val):
175 ValueToken.__init__(self, "integer", val)
176
177 -class Float(ValueToken):
178 - def __init__(self, val):
179 ValueToken.__init__(self, "float", val)
180
181 -class String(ValueToken):
182 - def __init__(self, val):
183 ValueToken.__init__(self, "string", val)
184
185 -class FunctionName(ValueToken):
186 - def __init__(self, val):
187 ValueToken.__init__(self, "functionname", val)
188
189 -class DecodeScanner(GenericScanner):
190 - def __init__(self):
192
193 - def tokenize(self, input):
194 self.rv = [] 195 GenericScanner.tokenize(self, input) 196 return self.rv
197
198 - def t_functionname(self, input):
199 r" \w+(\.\w+)*" 200 self.rv.append(FunctionName(input))
201
202 - def t_pipe(self, input):
203 r" \| " 204 self.rv.append(Token("pipe"))
205
206 - def t_open_paren(self, input):
207 r" \( " 208 self.rv.append(Token("open_paren"))
209
210 - def t_close_paren(self, input):
211 r" \) " 212 self.rv.append(Token("close_paren"))
213
214 - def t_comma(self, input):
215 r" , " 216 self.rv.append(Token("comma"))
217
218 - def t_whitespace(self, input):
219 r" \s+ " 220 pass
221
222 - def t_string(self, input):
223 r""" "([^"\\]+|\\.)*"|'([^'\\]+|\\.)*' """ 224 # "' # emacs cruft 225 s = input[1:-1] 226 s = unescape_C(s) 227 228 self.rv.append(String(s))
229
230 - def t_float(self, input):
231 r""" [+-]?((\d+(\.\d*)?)|\.\d+)([eE][+-]?[0-9]+)? """ 232 # See if this is an integer 233 try: 234 self.rv.append(Integer(safe_int(input))) 235 except ValueError: 236 self.rv.append(Float(float(input)))
237
238 -class Function:
239 - def __init__(self, name, args = ()):
240 self.name = name 241 self.args = args
242 - def __str__(self):
243 args = self.args 244 if not args: 245 s = "" 246 else: 247 s = str(args)[1:-1] 248 return "%s(x, %s)" % (self.name, s)
249 __repr__ = __str__
250
251 -class DecodeParser(GenericParser):
252 - def __init__(self, start = "expression"):
253 GenericParser.__init__(self, start) 254 self.begin_pos = 0
255
256 - def p_expression(self, args):
257 """ 258 expression ::= term 259 expression ::= term pipe expression 260 """ 261 if len(args) == 1: 262 return [args[0]] 263 return [args[0]] + args[2]
264
265 - def p_term(self, args):
266 """ 267 term ::= functionname 268 term ::= functionname open_paren args close_paren 269 """ 270 if len(args) == 1: 271 return Function(args[0].val) 272 return Function(args[0].val, tuple([x.val for x in args[2]]))
273
274 - def p_args(self, args):
275 """ 276 args ::= arg 277 args ::= arg comma args 278 """ 279 if len(args) == 1: 280 return [args[0]] 281 return [args[0]] + args[2]
282
283 - def p_arg(self, args):
284 """ 285 arg ::= string 286 arg ::= integer 287 arg ::= float 288 """ 289 return args[0]
290
291 -def scan(input):
292 scanner = DecodeScanner() 293 return scanner.tokenize(input)
294
295 -def parse(tokens):
296 parser = DecodeParser() 297 return parser.parse(tokens)
298 299 _decoder_cache = {} 300
301 -class FunctionCall:
302 - def __init__(self, f, args):
303 self.f = f 304 self.args = args
305 - def __call__(self, x):
306 return self.f(x, *self.args)
307
308 -class FunctionCallChain:
309 - def __init__(self, inner_f, f, args):
310 self.inner_f = inner_f 311 self.f = f 312 self.args = args
313 - def __call__(self, x):
314 return self.f(self.inner_f(x), *self.args)
315 316 #### I don't think this is the right way to do things 317 ##class CheckTypes: 318 ## def __init__(self, f, call_types, return_types): 319 ## self.f = f 320 ## self.call_types = call_types 321 ## self.return_types = return_types 322 ## def __call__(self, x): 323 ## if self.call_types is not None: 324 ## for T in self.call_types: 325 ## if isinstance(x, T): 326 ## break 327 ## else: 328 ## raise TypeError( 329 ## "Call value %s of type %s, expecting one of %s" % 330 ## (x, type(x).__name__, 331 ## [T.name for T in self.call_types])) 332 ## y = self.f(x) 333 334 ## if not self.return_types: 335 ## return y 336 337 ## for T in self.return_types: 338 ## if isinstance(y, T): 339 ## return y 340 ## raise TypeError("Return value %s of type %s, expecting one of %s" % 341 ## (y, type(y).__name__, 342 ## [T.name for T in self.return_types])) 343
344 -def make_decoder(s):
345 try: 346 return _decoder_cache[s] 347 except KeyError: 348 pass 349 350 functions = parse(scan(s)) 351 352 f = functions[0] 353 fc = decode_functions[f.name][0] 354 args = f.args 355 if args: 356 fc = FunctionCall(fc, args) 357 for f in functions[1:]: 358 fc = FunctionCallChain(fc, decode_functions[f.name][0], f.args) 359 _decoder_cache[s] = fc 360 return fc
361
362 -def _verify_subtypes(subset, total, old_name, new_name):
363 for x in subset: 364 if x not in total: 365 raise TypeError("%s can produce a %r value not accepted by %s" % 366 (old_name, x.__name__, new_name))
367 368 _typechecked_decoder_cache = {}
369 -def make_typechecked_decoder(s, input_types = None, output_types = None):
370 cache_lookup = (s, input_types, output_types) 371 try: 372 return _typechecked_decoder_cache[cache_lookup] 373 except KeyError: 374 pass 375 if input_types is not None and not isinstance(input_types, type( () )): 376 input_types = (input_types,) 377 if output_types is not None and not isinstance(output_types, type( () )): 378 output_types = (output_types,) 379 380 functions = parse(scan(s)) 381 382 # Make sure the input type(s) are allowed 383 f = functions[0] 384 fc, in_types, out_types = decode_functions[f.name] 385 if input_types is not None: 386 for x in input_types: 387 if x not in in_types: 388 raise TypeError( 389 "the input type includes %r which isn't supported by %s" % 390 (x.__name__, f.name)) 391 392 # Do the composition 393 old_name = f.name 394 input_types = out_types 395 args = functions[0].args 396 if args: 397 fc = FunctionCall(fc, args) 398 399 for f in functions[1:]: 400 transform_func, in_types, out_types = decode_functions[f.name] 401 _verify_subtypes(input_types, in_types, old_name, f.name) 402 old_name = f.name 403 input_types = out_types 404 fc = FunctionCallChain(fc, transform_func, f.args) 405 406 if output_types is not None: 407 _verify_subtypes(input_types, output_types, old_name, "the output") 408 _typechecked_decoder_cache[cache_lookup] = fc 409 return fc
410 411
412 -def test():
413 assert make_decoder("chop")("Andrew") == "Andre" 414 assert make_decoder("int")("9") == 9 415 assert make_decoder('join(" ")')(["Andrew", "Dalke"]) == \ 416 "Andrew Dalke" 417 assert make_decoder('chomp("|")')("|test|") == "|test" 418 assert make_decoder('chomp("|")')("|test") == "|test" 419 assert make_decoder('chomp("A")|chop')("BA") == "" 420 assert make_decoder('chomp("A")|chop')("AB") == "A" 421 assert make_decoder('chop|chomp("A")')("AB") == "" 422 assert make_decoder('chop|chomp("A")')("BA") == "B" 423 assert make_decoder('add(5)')(2) == 7 424 assert make_decoder('add(-2)')(5) == 3
425 426 if __name__ == "__main__": 427 test() 428