Package Bio :: Module Std
[hide private]
[frames] | no frames]

Source Code for Module Bio.Std

  1  # This is a Python module. 
  2  """This module is DEPRECATED. 
  3   
  4  Andrew Dalke is no longer maintaining Martel or Bio.Mindy, and these modules 
  5  and associate ones like Bio.Std are now deprecated.  They are no longer 
  6  used in any of the current Biopython parsers, and are likely to be removed 
  7  in a future release. 
  8  """ 
  9   
 10  import warnings 
 11  warnings.warn("Martel and those parts of Biopython depending on it" \ 
 12                +" directly (such as Bio.Mindy and Bio.Std) are now" \ 
 13                +" deprecated, and will be removed in a future release of"\ 
 14                +" Biopython.  If you want to continue to use this code,"\ 
 15                +" please get in contact with the Biopython developers via"\ 
 16                +" the mailing lists to avoid its permanent removal from"\ 
 17                +" Biopython.", \ 
 18                DeprecationWarning) 
 19  # Standard Bioformats definitions 
 20   
 21  import Martel 
 22  Group = Martel.Group 
 23   
 24  namespace = "bioformat" 
 25  NS = namespace + ":" 
 26  XMLNS = "http://biopython.org/bioformat" 
 27   
28 -def _set_if_given(attrs, field, d, valid = None, convert = None):
29 value = attrs.get(field) 30 if value is not None: 31 if valid is not None: 32 if value not in valid: 33 raise TypeError("%s (%r) must be one of %s" % \ 34 (field, value, valid)) 35 if convert is None: 36 d[field] = value 37 else: 38 d[field] = convert(value)
39
40 -def _complain_if_given(attrs, name):
41 if attrs.has_key(name) and attrs[name] is not None: 42 raise NotImplementedError("Don't yet handle %r" % (name,))
43
44 -def _must_have(expr, f):
45 tag = f.tag 46 if tag not in expr.group_names(): 47 raise TypeError( 48 "group %r not present in the expression but is required" % \ 49 (tag,))
50
51 -def _must_have_set(expr, sets):
52 names = expr.group_names() 53 for set in sets: 54 for f in set: 55 tag = f.tag 56 if tag not in names: 57 break 58 else: 59 return 60 if len(sets) == 1: 61 raise TypeError("missing required tags (need %s) in expression" % 62 [f.tag for f in sets[0]]) 63 lines = ["missing required tags in expression; must have one set from:"] 64 for set in sets: 65 lines.append( str( [t.tag for f in set] ) ) 66 s = "\n".join(lines) 67 raise TypeError(s)
68
69 -def _must_not_have(expr, f):
70 f.tag 71 if tag in expr.group_names(): 72 raise TypeError( 73 "group %r present in the expression but is not allowed" % \ 74 (tag,))
75 76 77 # pre- Python 2.2 functions didn't allow attributes
78 -def _f():
79 pass
80 try: 81 _f.x = 1 82 _use_hack = 0 83 except AttributeError: 84 _use_hack = 1 85 del _f 86
87 -def _check_name(f, text):
88 if text == "record": # XXX FIXME 89 return 90 assert NS + f.func_name == text, (NS + ":" + f.func_name, text)
91
92 -def _check_attrs(attrs, names):
93 for name in attrs.keys(): 94 if name not in names: 95 raise TypeError("attr %r is not allowed here (valid terms: %s)" % \ 96 (name, names)) 97 d = attrs.copy() 98 for name in names: 99 if not d.has_key(name): 100 d[name] = None 101 return d
102 103 if not _use_hack:
104 - def _settag(f, tag):
105 _check_name(f, tag) 106 f.tag = tag
107 else: 108 # Convert the functions into callable objects
109 - class StdTerm:
110 - def __init__(self, func):
111 self._func = func
112 - def __call__(self, *args, **kwargs):
113 return self._func( *args, **kwargs)
114
115 - def _settag(f, tag):
116 _check_name(f, tag) 117 x = globals()[f.func_name] = StdTerm(f) 118 x.tag = tag
119 120 ################ identifier, description, and cross-references
121 -def record(expr, attrs = {}):
122 attrs = _check_attrs(attrs, ("format",)) 123 d = {"xmlns:bioformat": XMLNS} 124 _set_if_given(attrs, "format", d) 125 return Group("record", expr, d) # XXX FIXME
126 _settag(record, "record") # XXX AND FIXME 127 128
129 -def dbid(expr, attrs = {}):
130 attrs = _check_attrs(attrs, ("type", "style", "dbname")) 131 d = {} 132 _set_if_given(attrs, "type", d, ("primary", "accession", "secondary")) 133 _set_if_given(attrs, "dbname", d) 134 return Group(NS + "dbid", expr, d)
135 _settag(dbid, NS + "dbid") 136
137 -def description_block(expr, attrs = {}):
138 attrs = _check_attrs(attrs, ("join",)) 139 _must_have(expr, description) 140 d = {} 141 _set_if_given(attrs, "join", d, ("english", "concat", "space", "newline")) 142 return Group(NS + "description_block", expr, d)
143 _settag(description_block, NS + "description_block") 144
145 -def description(expr, attrs = {}):
146 attrs = _check_attrs(attrs, ()) 147 return Group(NS + "description", expr)
148 _settag(description, NS + "description") 149
150 -def description_line(expr, attrs = {}):
151 return description_block(description(expr, attrs))
152
153 -def fast_dbxref(expr, attrs = {}):
154 attrs = _check_attrs(attrs, ("style",)) 155 d = {} 156 _set_if_given(attrs, "style", d, ("sp-general", "sp-prosite", "sp-embl")) 157 return Group(NS + "fast_dbxref", expr, d)
158
159 -def dbxref(expr, attrs = {}):
160 attrs = _check_attrs(attrs, ("style",)) 161 _must_have(expr, dbxref_dbid) 162 d = {} 163 _complain_if_given(attrs, "style") 164 return Group(NS + "dbxref", expr, d)
165 _settag(dbxref, NS + "dbxref") 166
167 -def dbxref_dbname(expr, attrs = {}):
168 attrs = _check_attrs(attrs, ("style",)) 169 d = {} 170 _set_if_given(attrs, "style", d) 171 return Group(NS + "dbxref_dbname", expr, d)
172 _settag(dbxref_dbname, NS + "dbxref_dbname") 173
174 -def dbxref_dbid(expr, attrs = {}):
175 attrs = _check_attrs(attrs, ("dbname", "type", "style", "negate")) 176 d = {} 177 _set_if_given(attrs, "dbname", d) 178 _set_if_given(attrs, "type", d, ("primary", "accession", "secondary")) 179 _complain_if_given(attrs, "style") 180 _set_if_given(attrs, "negate", d, (0, 1), str) 181 182 return Group(NS + "dbxref_dbid", expr, d)
183 _settag(dbxref_dbid, NS + "dbxref_dbid") 184
185 -def dbxref_negate(expr, attrs = {}):
186 attrs = _check_attrs(attrs, ()) 187 return Group(NS + "dbxref_negate", expr)
188 _settag(dbxref_negate, NS + "dbxref_negate") 189 190 ##################### sequences 191
192 -def _check_gapchar(s):
193 if not ( ord(" ") <= ord(s) <= 126 ): 194 raise TypeError("%r not allowed as a gap character" % (s,)) 195 return s
196 197 # What about three letter codes?
198 -def sequence_block(expr, attrs = {}):
199 attrs = _check_attrs(attrs, ("alphabet", "gapchar", "remove_spaces")) 200 _must_have(expr, sequence) 201 d = {} 202 _set_if_given(attrs, "alphabet", d, 203 ("iupac-protein", "iupac-dna", "iupac-rna", 204 "iupac-ambiguous-protein", 205 "iupac-ambiguous-dna", 206 "iupac-ambiguous-rna", 207 "protein", "dna", "rna", "unknown")) 208 _set_if_given(attrs, "gapchar", d, convert = _check_gapchar) 209 _set_if_given(attrs, "remove_spaces", d, (0, 1), str) 210 return Group(NS + "sequence_block", expr, d)
211 _settag(sequence_block, NS + "sequence_block") 212
213 -def sequence(expr, attrs = {}):
214 attrs = _check_attrs(attrs, ()) 215 return Group(NS + "sequence", expr)
216 _settag(sequence, NS + "sequence") 217
218 -def alphabet(expr, attrs = {}):
219 attrs = _check_attrs(attrs, ("alphabet",)) 220 d = {} 221 _set_if_given(attrs, "alphabet", d, 222 ("iupac-protein", "iupac-dna", "iupac-rna", 223 "iupac-ambiguous-protein", 224 "iupac-ambiguous-dna", 225 "iupac-ambiguous-rna", 226 "protein", "dna", "rna", "nucleotide", "unknown")) 227 return Group(NS + "alphabet", expr, d)
228 _settag(alphabet, NS + "alphabet") 229 230 231 232 ############################## features 233 234 # In PIR 235 236 # FEATURE 237 # 1-25 #domain signal sequence #status predicted #label SIG\ 238 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 239 # predicted #label MAT\ 240 # 63,209,297,365,522, 241 # 725 #binding_site carbohydrate (Asn) (covalent) #status 242 # predicted 243 244 # The whole thing is a 'feature_block' 245 246 # One 'feature' is 247 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 248 # predicted #label MAT\ 249 250 # One 'feature_name' is "binding_site". 251 252 # An example of the feature_location_block and feature_block, which I 253 # will abbreviate as 'flb' and 'fl', is: 254 # <flb> <fl>63,209,297,365,522,</fl> 255 # <fl>725</fl> #binding_site carbohydrate ... 256 257 # PIR doesn't have a 'feature_description' 258 259 # Let: 260 # fq = feature_qualifier 261 # fqb = feature_qualifier 262 # fqn = feature_qualifier_name 263 # fqd = feature_qualifier_description 264 # then the text 265 # 266 # 26-737 #product procollagen-lysine 5-dioxygenase 2 #status 267 # predicted #label MAT\ 268 # 269 # can be represented as (the rather tedious) 270 # 271 # 26-737 <fqb><fq>#<fqn>product</fqn> <fqd>procollagen-\ 272 # lysine 5-dioxygenase 2</fqd></fq> #<fq><fqn>status</fqn> 273 # <fqd>predicted</fqd> #<fq><fqn>label\ 274 # </fqn> <fqd>MAT</fqd></fq>\</fqb> 275 # 276 277 # 'style' determines the namespace for the feature name
278 -def feature_block(expr, attrs = {}):
279 attrs = _check_attrs(attrs, ("style", "location-style")) 280 d = {} 281 _set_if_given(attrs, "style", d) 282 _set_if_given(attrs, "location-style", d) 283 _must_have(expr, feature) 284 return Group(NS + "feature_block", expr, d)
285 _settag(feature_block, NS + "feature_block") 286
287 -def feature(expr, attrs = {}):
288 attrs = _check_attrs(attrs, ("location-style",)) 289 d = {} 290 _set_if_given(attrs, "location-style", d) 291 _must_have(expr, feature_name) 292 _must_have_set(expr, [[feature_location], 293 [feature_location_start, feature_location_end]]) 294 return Group(NS + "feature", expr, d)
295 _settag(feature, NS + "feature") 296
297 -def feature_name(expr, attrs = {}):
298 attrs = _check_attrs(attrs, ()) 299 return Group(NS + "feature_name", expr)
300 _settag(feature_name, NS + "feature_name") 301
302 -def feature_location(expr, attrs = {}):
303 attrs = _check_attrs(attrs, ()) 304 return Group(NS + "feature_location", expr)
305 _settag(feature_location, NS + "feature_location") 306
307 -def feature_location_start(expr, attrs = {}):
308 attrs = _check_attrs(attrs, ()) 309 return Group(NS + "feature_location_start", expr)
310 _settag(feature_location_start, NS + "feature_location_start") 311
312 -def feature_location_end(expr, attrs = {}):
313 attrs = _check_attrs(attrs, ()) 314 return Group(NS + "feature_location_end", expr)
315 _settag(feature_location_end, NS + "feature_location_end") 316
317 -def feature_description(expr, attrs = {}):
318 attrs = _check_attrs(attrs, ()) 319 return Group(NS + "feature_description", expr)
320 _settag(feature_description, NS + "feature_description") 321 322 323 ##def feature_qualifier_block(expr, attrs = {}): 324 ## attrs = _check_attrs(attrs, ()) 325 ## _must_have(expr, feature_qualifier) 326 ## return Group(NS + "feature_qualifier_block", expr) 327 ##_settag(feature_qualifier_block, NS + "feature_qualifier_block") 328
329 -def feature_qualifier(expr, attrs = {}):
330 attrs = _check_attrs(attrs, ()) 331 _must_have(expr, feature_qualifier_name) 332 return Group(NS + "feature_qualifier", expr)
333 _settag(feature_qualifier, NS + "feature_qualifier") 334
335 -def feature_qualifier_name(expr, attrs = {}):
336 attrs = _check_attrs(attrs, ()) 337 return Group(NS + "feature_qualifier_name", expr)
338 _settag(feature_qualifier_name, NS + "feature_qualifier_name") 339
340 -def feature_qualifier_description(expr, attrs = {}):
341 attrs = _check_attrs(attrs, ()) 342 return Group(NS + "feature_qualifier_description", expr)
343 _settag(feature_qualifier_description, NS + "feature_qualifier_description") 344 345 346 ############ For homology searches 347 348 # "BLASTN", "BLASTP"
349 -def application_name(expr, attrs = {}):
350 attrs = _check_attrs(attrs, ("app",)) 351 return Group("bioformat:application_name", expr, attrs)
352 353 # "2.0.11", "2.0a19MP-WashU"
354 -def application_version(expr, attrs = {}):
355 attrs = _check_attrs(attrs, ()) 356 return Group("bioformat:application_version", expr, attrs)
357
358 -def search_header(expr, attrs = {}):
359 attrs = _check_attrs(attrs, ()) 360 return Group("bioformat:search_header", expr, attrs)
361
362 -def search_table(expr, attrs = {}):
363 attrs = _check_attrs(attrs, ()) 364 return Group("bioformat:search_table", expr, attrs)
365
366 -def search_table_description(expr, attrs = {}):
367 attrs = _check_attrs(attrs, ("bioformat:decode",)) 368 d = {"bioformat:decode": "strip"} 369 _set_if_given(attrs, "bioformat:decode", d) 370 return Group("bioformat:search_table_description", expr, d)
371
372 -def search_table_value(expr, attrs = {}):
373 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 374 return Group("bioformat:search_table_value", expr, attrs)
375
376 -def search_table_entry(expr, attrs = {}):
377 attrs = _check_attrs(attrs, ()) 378 return Group("bioformat:search_table_entry", expr, attrs)
379
380 -def query_description_block(expr, attrs = {}):
381 attrs = _check_attrs(attrs, ("join-query",)) 382 d = {"join-query": "join|fixspaces"} 383 _set_if_given(attrs, "join-query", d) 384 return Group("bioformat:query_description_block", expr, d)
385
386 -def query_description(expr, attrs = {}):
387 attrs = _check_attrs(attrs, ("bioformat:decode")) 388 d = {} 389 _set_if_given(attrs, "bioformat:decode", d) 390 return Group("bioformat:query_description", expr, d)
391
392 -def query_size(expr, attrs = {}):
393 attrs = _check_attrs(attrs, ()) 394 return Group("bioformat:query_size", expr)
395
396 -def database_name(expr, attrs = {}):
397 attrs = _check_attrs(attrs, ()) 398 return Group("bioformat:database_name", expr, attrs)
399
400 -def database_num_sequences(expr, attrs = {}):
401 attrs = _check_attrs(attrs, ("bioformat:decode",)) 402 return Group("bioformat:database_num_sequences", expr, attrs)
403
404 -def database_num_letters(expr, attrs = {}):
405 attrs = _check_attrs(attrs, ("bioformat:decode",)) 406 return Group("bioformat:database_num_letters", expr, attrs)
407
408 -def hit(expr, attrs = {}):
409 attrs = _check_attrs(attrs, ("join-description",)) 410 d = {"join-description": "join|fixspaces"} 411 _set_if_given(attrs, "join-description", d) 412 return Group("bioformat:hit", expr, d)
413
414 -def hit_length(expr, attrs = {}):
415 attrs = _check_attrs(attrs, ()) 416 return Group("bioformat:hit_length", expr, attrs)
417
418 -def hit_description(expr, attrs = {}):
419 attrs = _check_attrs(attrs, ("bioformat:decode")) 420 d = {} 421 _set_if_given(attrs, "bioformat:decode", d) 422 return Group("bioformat:hit_description", expr, d)
423
424 -def hsp(expr, attrs = {}):
425 attrs = _check_attrs(attrs, ()) 426 return Group("bioformat:hsp", expr, attrs)
427
428 -def hsp_value(expr, attrs = {}):
429 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 430 return Group("bioformat:hsp_value", expr, attrs)
431
432 -def hsp_frame(expr, attrs = {}):
433 attrs = _check_attrs(attrs, ("which",)) 434 d = {} 435 _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject")) 436 return Group("bioformat:hsp_frame", expr, d)
437
438 -def hsp_strand(expr, attrs = {}):
439 attrs = _check_attrs(attrs, ("strand", "which")) 440 d = {} 441 _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject")) 442 _set_if_given(attrs, "strand", d, valid = ("+1", "0", "-1", "")) 443 return Group("bioformat:hsp_strand", expr, d)
444
445 -def hsp_seqalign_query_seq(expr, attrs = {}):
446 attrs = _check_attrs(attrs, ()) 447 return Group("bioformat:hsp_seqalign_query_seq", expr, attrs)
448
449 -def hsp_seqalign_homology_seq(expr, attrs = {}):
450 attrs = _check_attrs(attrs, ()) 451 return Group("bioformat:hsp_seqalign_homology_seq", expr, attrs)
452
453 -def hsp_seqalign_subject_seq(expr, attrs = {}):
454 attrs = _check_attrs(attrs, ()) 455 return Group("bioformat:hsp_seqalign_subject_seq", expr, attrs)
456
457 -def hsp_seqalign_query_leader(expr, attrs = {}):
458 attrs = _check_attrs(attrs, ()) 459 return Group("bioformat:hsp_seqalign_query_leader", expr, attrs)
460 461
462 -def hsp_seqalign_query_name(expr, attrs = {}):
463 attrs = _check_attrs(attrs, ()) 464 return Group("bioformat:hsp_seqalign_query_name", expr, attrs)
465
466 -def hsp_seqalign_subject_name(expr, attrs = {}):
467 attrs = _check_attrs(attrs, ()) 468 return Group("bioformat:hsp_seqalign_subject_name", expr, attrs)
469
470 -def hsp_seqalign(expr, attrs = {}):
471 attrs = _check_attrs(attrs, ()) 472 return Group("bioformat:hsp_seqalign", expr, attrs)
473
474 -def hsp_seqalign_query_start(expr, attrs = {}):
475 attrs = _check_attrs(attrs, ()) 476 return Group("bioformat:hsp_seqalign_query_start", expr, attrs)
477
478 -def hsp_seqalign_query_end(expr, attrs = {}):
479 attrs = _check_attrs(attrs, ()) 480 return Group("bioformat:hsp_seqalign_query_end", expr, attrs)
481
482 -def hsp_seqalign_subject_start(expr, attrs = {}):
483 attrs = _check_attrs(attrs, ()) 484 return Group("bioformat:hsp_seqalign_subject_start", expr, attrs)
485
486 -def hsp_seqalign_subject_end(expr, attrs = {}):
487 attrs = _check_attrs(attrs, ()) 488 return Group("bioformat:hsp_seqalign_subject_end", expr, attrs)
489
490 -def search_parameter(expr, attrs = {}):
491 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 492 d = {} 493 _set_if_given(attrs, "name", d) 494 _set_if_given(attrs, "bioformat:decode", d) 495 return Group("bioformat:search_parameter", expr, d)
496
497 -def search_statistic(expr, attrs = {}):
498 attrs = _check_attrs(attrs, ("name", "bioformat:decode")) 499 d = {} 500 _set_if_given(attrs, "name", d) 501 _set_if_given(attrs, "bioformat:decode", d) 502 return Group("bioformat:search_statistic", expr, d)
503