Package Bio :: Package EUtils :: Module POM
[hide private]
[frames] | no frames]

Source Code for Module Bio.EUtils.POM

   1  #!/usr/bin/env python 
   2   
   3  """ 
   4  This module implements the XML POM -- the Python Object Model for XML. It is 
   5  something like DOM, but more Python-ic, and easier to use. These base classes 
   6  are used to build POM source files which are self-validating python-based XML 
   7  constructor objects. The major parts of the dtd2py command line tool are also 
   8  here. 
   9   
  10  """ 
  11   
  12  import sys, os, re, string 
  13   
  14  try: 
  15          True 
  16  except NameError: 
  17          True = 1 
  18          False = 0 
  19   
20 -class ValidationError(ValueError):
21 """ValidationError 22 This exception is raised when an attempt is made to construct an XML POM 23 tree that would be invalid. 24 25 """ 26 pass
27 28 ######################################################### 29 # XML generating classes 30 # These classes are used to generate XML documents, similar to DOM. But, this 31 # interface is simpler and more Python-ic. 32 ######################################################### 33 34 # plain text data to be added to a GenericNode. 35 # this class needs to emulate much of the ElementNode interface.
36 -class IndentedText(str):
37 - def __init__(self, data=""):
38 self.data = unescape(unicode(data)) 39 self._level = 0 40 self._parent = None
41 - def set_text(self, data):
42 self.data = unescape(unicode(data))
43 - def get_text(self):
44 return self.data
45 - def insert(self, data):
46 self.data = unescape(unicode(data)) + self.data
47 - def add_text(self,data):
48 self.data = self.data + unescape(unicode(data))
49 append = add_text
50 - def __str__(self):
51 return "%s%s" % ("\t"*self._level, escape(self.data))
52 - def __unicode__(self):
53 return u"%s%s" % ("\t"*self._level, escape(self.data))
54 - def __repr__(self):
55 return "%s(%r)" % (self.__class__.__name__, escape(self.data))
56 - def set_level(self, level):
57 self._level = level
58 # def __len__(self): 59 # return len(self.data) 60 # def __getslice__(self, start, end): 61 # return self.data[start:end] 62 # def __setslice__(self, start, end, v): 63 # self.data[start:end] = v 64 # def __delslice__(self, start, end): 65 # del self.data[start:end]
66 - def get_escape_length(self):
67 return len(escape(self.data))
68 - def destroy(self):
69 self.data = None 70 self._parent = None
71 - def fullpath(self):
72 if self._parent: 73 return "%s = %r" % (self._parent.fullpath(), self.data) 74 else: 75 return `self.data`
76 - def matchpath(self, pe):
77 return 0
78 - def has_children(self):
79 return 0
80 - def has_attributes(self):
81 return 0
82 - def full_repr(self):
83 return repr(self)
84 85
86 -class Text(IndentedText):
87 - def __str__(self):
88 return escape(self.data)
89 - def __unicode__(self):
90 return escape(self.data)
91
92 -class Comment(IndentedText):
93 - def __init__(self, data=""):
94 self.data = unicode(data) 95 self._level = 0 96 self._parent = None
97 - def __str__(self):
98 return "%s<!-- %s -->" % ("\t"*self._level, self._fix(self.data))
99 - def __unicode__(self):
100 return u"%s<!-- %s -->" % ("\t"*self._level, self._fix(self.data))
101 - def set_text(self, data):
102 self.data = unicode(data)
103 - def get_text(self):
104 return self.data
105 - def insert(self, data):
106 self.data = unicode(data) + self.data
107 - def add_text(self,data):
108 self.data = self.data + unicode(data)
109 append = add_text 110
111 - def _fix(self, data):
112 if data.find(u"--") != -1: 113 data = data.replace(u"--", u"- ") 114 return data
115 116 117 118 # abstract base class for generic XML node generation. 119 # Create an XML node by subclassing this and defining allowed attribute names 120 # in ATTLIST. CONTENTMODEL holds the content specification from the DTD. 121 # Then name of the subclass should exactly match the name of the XML element. 122
123 -class ElementNode:
124 ATTLIST = None 125 CONTENTMODEL = None 126 _acquired = { "_indented":1, "_namespace":None } # default acquired values
127 - def __init__(self, **attribs):
128 self._attribs = {} 129 for key, value in attribs.items(): 130 if self._validate_attribute(key): 131 self._attribs[key] = value 132 else: 133 raise ValidationError, "invalid attribute name for this element" 134 self._children = [] 135 self._parent = None 136 self._level = 0 137 self._inline = 0 138 #self._indented = 1 # may be acquired. 139 #self._namespace = None # may be acquired. 140 # you can force element names to a particular case, regardless of 141 # subclass name. This is sometimes needed overcome clashes with Python 142 # keyword names. 143 self._name = self.__class__.__name__
144 145 # check if attribute name is defined for this element
146 - def _validate_attribute(self, name):
147 if self.ATTLIST: 148 for xmlattr in self.ATTLIST: 149 if name == xmlattr.name: 150 return True 151 return False
152
153 - def _verify_attributes(self):
154 if not self.ATTLIST: 155 return None 156 for attr in self.ATTLIST: 157 aval = self._attribs.get(attr.name, None) 158 if aval is None: 159 if attr.a_decl == REQUIRED: 160 raise ValidationError, "required attribute not present: " + attr.name 161 else: 162 attr.verify(aval)
163 164
165 - def get_parent(self):
166 return self._parent
167
168 - def reparent(self, newparent):
169 if self._parent: 170 i = self._parent.index(self) 171 del self._parent[i] 172 newparent.append(self)
173
174 - def detach(self):
175 self._parent = None 176 self._level = 0
177
178 - def destroy(self):
179 """destroy() Remove this node and all child node references.""" 180 # remove parent _children list reference 181 if self._parent: 182 i = self._parent.index(self) 183 del self._parent[i] 184 self._parent = None 185 for n in self._children: 186 n.detach() 187 self._children = None
188
189 - def set_level(self, level):
190 self._level = int(level)
191
192 - def set_inline(self, tf=1):
193 self._inline = not not tf # force to boolean
194
195 - def set_indented(self, tf=1):
196 self._indented = not not tf # force to boolean
197
198 - def inherit_indent(self):
199 "clears indentation flag so that it may be acquired from parent." 200 try: 201 del self.__dict__["_indented"] 202 except KeyError: 203 pass
204
205 - def set_namespace(self, ns):
206 self._namespace = ns
207 208 # some ugly stuff for case-insensitive XHTML
209 - def use_lowercase(self):
210 self._name = self.__class__.__name__.lower()
211
212 - def use_uppercase(self):
213 self._name = self.__class__.__name__.upper()
214
215 - def use_truecase(self):
216 self._name = self.__class__.__name__
217
218 - def index(self, obj):
219 objid = id(obj) 220 i = 0 221 for o in self._children: 222 if id(o) == objid: 223 return i 224 i += 1 225 raise ValueError, "ElementNode: Object not contained here."
226
227 - def append(self, obj):
228 obj.set_level(self._level+1) 229 obj._parent = self 230 self._children.append(obj)
231
232 - def extend(self, objlist):
233 for obj in objlist: 234 self.append(obj)
235
236 - def insert(self, index, obj):
237 obj.set_level(self._level+1) 238 obj._parent = self 239 self._children.insert(index, obj)
240
241 - def add(self, klass, **kwargs):
242 obj = klass( *(), **kwargs) 243 self.append(obj) 244 return obj
245
246 - def get_children(self):
247 return self._children[:]
248
249 - def __iter__(self):
250 return iter(self._children)
251
252 - def add_text(self, text):
253 "Adding text to elements is so common, there is a special method for it." 254 if self.has_children() and isinstance(self._children[-1], IndentedText): 255 self._children[-1].add_text(text) 256 else: 257 t = Text(text) 258 t.set_level(0) 259 self.append(t)
260
261 - def replace_text(self, text):
262 if self._children: 263 del self._children[-1] 264 self.append(Text(text))
265
266 - def __len__(self):
267 return len(self._children)
268 269 # The truth is, we exist.
270 - def __nonzero__(self):
271 return 1
272
273 - def hasAttributes(self):
274 return len(self._attribs)
275 has_attributes = hasAttributes 276
277 - def has_attribute(self, name):
278 if name in self._attribs.keys(): 279 return 1 280 else: 281 return 0
282
283 - def attributes(self):
284 return map(lambda o: o.name, self.ATTLIST)
285
286 - def has_children(self):
287 return len(self._children)
288
289 - def set_attribute(self, name, val):
290 """set_attribute(name, value) 291 This exists to set attributes that have names with illegal Python 292 identifier characters. 293 294 """ 295 if self._validate_attribute(name): 296 self._attribs[name] = val
297
298 - def get_attribute(self, name):
299 """get_attribute(name) 300 This exists to set attributes that have names with illegal Python 301 identifier characters. 302 303 """ 304 return self._attribs[name]
305
306 - def __setattr__(self, name, value):
307 if self._validate_attribute(name): 308 self._attribs[name] = value 309 else: 310 self.__dict__[name] = value
311 312 # this plus the _parent and _acquired attributes implement "acquisiton", 313 # or run-time inheritance.
314 - def __getattr__(self, name):
315 try: 316 return self._attribs[name] 317 except KeyError: 318 pass 319 try: 320 return self._acquire(name) 321 except: 322 pass 323 raise AttributeError, "AttributeError: %s has no attribute '%s'" % (self._name, name)
324
325 - def _acquire(self, name):
326 if self._parent: 327 try: 328 return self._parent.__dict__[name] 329 except KeyError: 330 pass 331 return self._parent._acquire(name) 332 else: 333 try: 334 return self._acquired[name] 335 except KeyError: 336 pass 337 raise AttributeError
338
339 - def __delattr__(self, name):
340 del self._attribs[name]
341
342 - def _find_index(self, index):
343 if type(index) is str: 344 for i in xrange(len(self._children)): 345 if self._children[i].matchpath(index): 346 return i 347 raise IndexError, "no elements match" 348 else: 349 return index
350
351 - def __getitem__(self, index):
352 if type(index) is str: 353 el = self.get_element(index) 354 if el is None: 355 raise IndexError, "no item matches" 356 else: 357 return el 358 else: 359 return self._children[index]
360
361 - def get(self, index, default = None):
362 if isinstance(index, str): 363 el = self.get_element(index) 364 if el is None: 365 return default 366 return el 367 return self._children[index]
368
369 - def has_key(self, index):
370 if isinstance(index, str): 371 return self.get_element(index) is not None 372 raise TypeError("Can only use has_key on a string")
373 374
375 - def __setitem__(self, index, obj):
376 index = self._find_index(index) 377 obj.set_level(self._level+1) 378 obj._parent = self 379 self._children[index] = obj
380
381 - def __delitem__(self, index):
382 index = self._find_index(index) 383 # self._children[index].destroy() 384 del self._children[index]
385
386 - def __repr__(self):
387 attrs = map(lambda t: '%s=%r' % t, self._attribs.items()) 388 return "%s(%s)" % (self.__class__, ", ".join(attrs))
389
390 - def __str__(self):
391 self._verify_attributes() 392 if not self.CONTENTMODEL or self.CONTENTMODEL.is_empty(): 393 return self._empty_str() 394 else: 395 return self._non_empty_str()
396
397 - def __unicode__(self):
398 self._verify_attributes() 399 if not self.CONTENTMODEL or self.CONTENTMODEL.is_empty(): 400 return self._empty_unistr() 401 else: 402 return self._non_empty_unistr()
403
404 - def full_repr(self):
405 s = ["n%d = %r" % ( self._level, self)] 406 s.append("n%d.set_level(%d)" % (self._level, self._level+1)) 407 for c in self._children: 408 if not c.has_children(): 409 s.append("n%d.append(%r)" % (self._level, c)) 410 else: 411 s.append(c.full_repr()) 412 s.append("n%d.append(n%d)" % (self._level, self._level+1)) 413 s.append("del n%d" % (self._level+1)) 414 return "\n".join(s)
415
416 - def _tabs(self):
417 return "\t"*(self._level*self._indented)
418
419 - def _get_ns(self):
420 return IF(self._namespace, "%s:" % self._namespace, "")
421
422 - def _non_empty_str(self):
423 s = ["%s<%s%s%s>" % (self._tabs(), self._get_ns(), self._name, self._attr_str())] 424 map(s.append, map(str, self._children)) 425 s.append("%s</%s%s>" % (IF(self._inline, "", self._tabs()), self._get_ns(), self._name)) 426 if self._inline: 427 return "".join(s) 428 else: 429 return "\n".join(s)
430
431 - def _empty_str(self):
432 return "%s<%s%s%s />" % (self._tabs(), self._get_ns(), self._name, self._attr_str())
433
434 - def _attr_str(self):
435 attrs = map(lambda t: ' %s="%s"' % t, map(lambda t: (t[0], escape(str(t[1]))), filter(lambda t: t[1] is not None, self._attribs.items()))) 436 return "".join(attrs)
437
438 - def _non_empty_unistr(self):
439 s = [u"%s<%s%s%s>" % (self._tabs(), self._get_ns(), self._name, self._attr_unistr())] 440 map(s.append, map(unicode, self._children)) 441 s.append(u"%s</%s%s>" % (IF(self._inline, "", self._tabs()), self._get_ns(), self._name)) 442 if self._inline: 443 return u"".join(s) 444 else: 445 return u"\n".join(s)
446
447 - def _empty_unistr(self):
448 return u"%s<%s%s%s />" % (self._tabs(), self._get_ns(), self._name, self._attr_unistr())
449
450 - def _attr_unistr(self):
451 attrs = map(lambda t: u' %s="%s"' % t, map(lambda t: (t[0], escape(unicode(t[1]))), filter(lambda t: t[1] is not None, self._attribs.items()))) 452 return u"".join(attrs)
453 454 # methods for node path manipulation
455 - def pathname(self):
456 """pathname() returns the ElementNode as a string in xpath format.""" 457 if self._attribs: 458 s = map(lambda i: "@%s='%s'" % (i[0],i[1]), self._attribs.items()) 459 return "%s[%s]" % (self.__class__.__name__, " and ".join(s)) 460 else: 461 return self.__class__.__name__
462
463 - def fullpath(self):
464 """fullpath() returns the ElementNode's full path as a string in xpath format.""" 465 if self._parent: 466 base = self._parent.fullpath() 467 else: 468 base = "" 469 return "%s/%s" % (base, self.pathname() )
470
471 - def matchpath(self, pathelement):
472 if "[" not in pathelement: 473 return pathelement == self._name 474 else: 475 xpath_re = re.compile(r'(\w*)(\[.*])') 476 mo = xpath_re.match(pathelement) 477 if mo: 478 name, match = mo.groups() 479 match = match.replace("@", "self.") 480 match = match.replace("=", "==") 481 return (name == self._name and eval(match[1:-1])) 482 else: 483 raise ValueError, "ivalid path element"
484
485 - def find_elements(self, pathelement):
486 rv = [] 487 for child in self._children: 488 if child.matchpath(pathelement): 489 rv.append(child) 490 return rv
491
492 - def get_element(self, pathelement):
493 for child in self._children: 494 if child.matchpath(pathelement): 495 return child 496 return None
497
498 - def _find_node(self, eltype, collect=None):
499 if collect is None: 500 collection = [] 501 else: 502 collection = collect # should be a list 503 for el in self._children: 504 if el.has_children(): 505 el._find_node(eltype, collection) 506 if isinstance(el, eltype): 507 collection.append(el) 508 return collection
509
510 - def find(self, elclass, **attribs):
511 for obj in self._children: 512 if isinstance(obj, elclass): 513 if self._attribs_match(obj, attribs): 514 return obj 515 return None
516
517 - def getall(self, elclass, depth=0, collect=None):
518 if collect is None: 519 rv = [] 520 else: 521 rv = collect # should be a list 522 for el in self._children: 523 if isinstance(el, elclass): 524 rv.append(el) 525 if depth > 0: 526 el.getall(elclass, depth-1, rv) 527 return rv
528
529 - def _attribs_match(self, obj, attribdict):
530 for tname, tval in attribdict.items(): 531 try: 532 if getattr(obj, tname) != tval: 533 return 0 534 except AttributeError: 535 return 0 536 return 1
537
538 - def tostring(self):
539 return "".join([x.get_text() for x in self.text()])
540 541 # XPath-like functions
542 - def comment(self):
543 return self._find_node(Comment)
544
545 - def text(self):
546 return self._find_node(IndentedText)
547
548 - def processing_instruction(self):
549 return self._find_node(ProcessingInstruction)
550
551 - def node(self):
552 return self._find_node(ElementNode)
553 554 555
556 -class Fragments(ElementNode):
557 """Fragments is a special holder class to hold 'loose' markup fragments. 558 That is, bits of markup that don't have a common container. It is invisible.""" 559
560 - def __str__(self):
561 s = [] 562 map(s.append, map(str, self._children)) 563 if self._inline: 564 return "".join(s) 565 else: 566 return "\n".join(s)
567
568 - def __unicode__(self):
569 s = [] 570 map(s.append, map(str, self._children)) 571 if self._inline: 572 return u"".join(s) 573 else: 574 return u"\n".join(s)
575 576 577 578 # base class for whole POM documents, including Header.
579 -class POMDocument:
580 HEADER = '<?xml version="1.0" encoding="iso-8859-1"?>\n'
581 - def __init__(self, dtd=None):
582 self.dtd = dtd 583 self.root = None 584 self.parser = None 585 self.dirty = 0
586
587 - def __str__(self):
588 return self.HEADER + str(self.root) + "\n"
589
590 - def __unicode__(self):
591 return self.HEADER + unicode(self.root) + "\n"
592
593 - def set_dirty(self, val=1):
594 self.dirty = val
595
596 - def get_parser(self, handlerclass=None, module=None):
597 mod = module or self.dtd 598 self.parser = get_parser(handlerclass, self._callback, mod) 599 return self.parser
600
601 - def del_parser(self):
602 self.parser = None
603
604 - def _callback(self, doc):
605 self.root = doc 606 self.dirty = 0
607
608 - def parse(self, url, handlerclass=None, module=None):
609 mod = module or self.dtd 610 if not self.parser: 611 self.get_parser(handlerclass, mod) 612 self.parser.parse(url) 613 self.del_parser()
614
615 - def parseFile(self, fo, handlerclass=None, module=None):
616 mod = module or self.dtd 617 if not self.parser: 618 self.get_parser(handlerclass, mod) 619 self.parser.parseFile(fo) 620 self.del_parser()
621
622 - def write_xmlfile(self, filename=None):
623 filename = filename or self.filename 624 if filename: 625 fo = open(os.path.expanduser(filename), "w") 626 try: 627 fo.write(str(self)) 628 finally: 629 fo.close() 630 self.dirty = 0
631 writefile = write_xmlfile 632
633 - def writefileobject(self, fo):
634 fo.write(str(self))
635
636 - def get_document(self, filename, dtdmodule):
637 self.get_parser(module=dtdmodule) 638 self.parse(filename) 639 self.filename = filename
640
641 - def getnode(self, path):
642 """getnode(path) Returns an ElementNode addressed by the path.""" 643 elements = path.split("/") 644 while not elements[0]: # eat empty first element 645 elements.pop(0) 646 node = self.root 647 pathelement = elements.pop(0) 648 if node.matchpath(pathelement): 649 while elements: 650 pathelement = elements.pop(0) 651 node = node.get_element(pathelement) 652 if node is None: 653 raise IndexError, "path element not found" 654 return node 655 else: 656 raise IndexError, "first path element not found"
657
658 - def setnode(self, path, text):
659 node = self.getnode(path) 660 node.replace_text(text)
661
662 - def delnode(self, path):
663 els = path.split("/") 664 path, endnode = "/".join(els[:-1]), els[-1] 665 node = self.getnode(path) 666 del node[endnode]
667
668 - def addnode(self, basepath, newnode):
669 node = self.getnode(basepath) 670 node.append(newnode)
671
672 - def add_text(self, basepath, text):
673 node = self.getnode(basepath) 674 node.add_text(text)
675
676 - def _write_text(self, fo, node):
677 for n in node: 678 if isinstance(n, IndentedText): 679 fo.write(n.fullpath()) 680 fo.write("\n") 681 else: 682 self._write_text(fo, n)
683
684 - def write_repr(self, fo):
685 realfile = 0 686 if type(fo) is str: 687 fo = open(fo, "w") 688 realfile = 1 689 fo.write(self.root.full_repr()) 690 if realfile: 691 fo.close()
692
693 - def read_repr(self, filename, localdict=None):
694 localdict = localdict or {} 695 execfile(filename, globals(), localdict) 696 self.root = localdict["n0"]
697
698 - def write_paths(self, fileobject):
699 realfile = 0 700 if type(fileobject) is str: 701 fileobject = open(fileobject, "w") 702 realfile = 1 703 self._write_text(fileobject, self.root) 704 if realfile: 705 fileobject.close()
706 707 708 709 # parses XML files into a POM object model. A callback function is then called 710 # with this object model as a paramter.
711 -class ObjectParserHandler:
712 - def __init__(self, callback, module=None):
713 self.stack = [] 714 self.msg = None 715 self.callback = callback # gets called when message fully parsed. The 716 # argument is the toplevel message object. 717 self.modules = [] 718 if module is not None: 719 if type(module) is list: 720 self.modules.extend(module) 721 else: 722 self.modules.append(module)
723
724 - def add_module(self, module):
725 self.modules.append(module)
726
727 - def _get_class(self, name):
728 klass = None 729 for mod in self.modules: 730 try: 731 klass = getattr(mod, name) 732 except AttributeError: 733 continue 734 if klass: 735 return klass 736 raise AttributeError
737 738
739 - def startDocument(self):
740 self.stack = []
741
742 - def endDocument(self):
743 if self.stack: # stack should be empty now 744 raise ValidationError, "unbalanced document!" 745 self.callback(self.msg) 746 self.msg = None
747
748 - def startElement(self, name, atts):
749 "Handle an event for the beginning of an element." 750 try: 751 klass = self._get_class(name) 752 except AttributeError: 753 raise ValidationError, "Undefined element tag: "+name 754 attr = {} # atts is a instance with unicode keys.. must convert to str.. 755 def fixatts(t): 756 attr[str(t[0])] = unescape(str(t[1]))
757 map(fixatts, atts.items()) 758 obj = klass( *(), **attr) 759 obj.set_level(len(self.stack)) 760 self.stack.append(obj)
761
762 - def endElement(self, name):
763 "Handle an event for the end of an element." 764 obj = self.stack.pop() 765 if self.stack: 766 self.stack[-1].append(obj) 767 else: 768 self.msg = obj
769
770 - def characters(self, text):
771 if self.stack: 772 text = text.strip() 773 if text: 774 self.stack[-1].append(Text(text))
775
776 - def ignorableWhitespace(self, ch, start, length):
777 pass
778 - def processingInstruction(self, target, data):
779 "Handle a processing instruction event." 780 print "unhandled processing instruction:", target, data
781 - def setDocumentLocator(self, locator):
782 "Receive an object for locating the origin of SAX document events." 783 pass
784 785
786 -def _default_parser_callback(obj):
787 print obj
788
789 -def get_parser(handlerclass=None, callback=None, module=None):
790 import xml.sax 791 hc = handlerclass or ObjectParserHandler 792 cb = callback or _default_parser_callback 793 mod = module or sys.modules[__name__] 794 handler = hc(cb, mod) 795 parser = xml.sax.make_parser() 796 parser.setContentHandler(handler) 797 return parser
798 799 #from xml.parsers.xmlproc.xmlapp import DTDConsumer
800 -def get_dtd_compiler(fo, mixinmodule=None, toupper=0):
801 global sourcegen 802 import sourcegen 803 from xml.parsers.xmlproc.dtdparser import DTDParser 804 generator = sourcegen.get_sourcefile(fo) 805 dh = DTDConsumerForSourceGeneration(generator, mixinmodule, toupper) 806 parser = DTDParser() 807 parser.set_dtd_consumer(dh) 808 return parser
809 810 811 812 # xml helper classes, used in both generation and operation 813 # The are instantiated during compilation to generate themselves. 814 # Then, when imported by the user from the dtds package, are used normally.
815 -class ContentModel:
816 """Represents and validates a content model. 817 818 """
819 - def __init__(self, rawmodel=None):
820 self.model = rawmodel # XXX
821
822 - def __repr__(self):
823 return "%s(%r)" % (self.__class__, self.model)
824
825 - def is_empty(self):
826 return not self.model
827 828
829 -class _ContentModelGenerator:
830 """_ContentModelGenerator(rawmodel) 831 The DTD parser generated and final content model are so different that a 832 different content model generator is used for this object. 833 834 """
835 - def __init__(self, rawmodel=None):
836 tm_type = type(rawmodel) 837 if tm_type is str: 838 if rawmodel == "EMPTY": 839 self.model = EMPTY 840 elif rawmodel == "#PCDATA": 841 self.model = PCDATA 842 elif rawmodel == "ANY": 843 self.model = ANY 844 else: 845 raise ValidationError, "ContentModelGenerator: unknown special type" 846 elif tm_type is tuple: 847 self.model = rawmodel # XXX 848 elif tm_type is type(None): 849 self.model = None 850 else: 851 raise RuntimeError, "unknown content model format"
852
853 - def __repr__(self):
854 return "%s(%r)" % (ContentModel, self.model)
855 856
857 -class Enumeration(list):
858 pass
859 # XXX 860
861 -class AttributeList(list):
862 - def __repr__(self):
863 return "%s(%r)" % (self.__class__, self.data)
864 - def __str__(self):
865 return " ".join(map(str, self.data))
866 - def __unicode__(self):
867 return u" ".join(map(str, self.data))
868
869 -class _AttributeType(str):
870 - def __repr__(self):
871 return "%s('%s')" % (self.__class__.__name__, self)
872
873 -class IDREFS(AttributeList):
874 - def add_ref(self, value):
875 self.data.append(IDREF(value))
876
877 -class ENTITIES(AttributeList):
878 pass
879 -class NMTOKENS(AttributeList):
880 pass
881
882 -class CDATA(_AttributeType):
883 pass
884 -class ID(_AttributeType):
885 pass
886 -class IDREF(_AttributeType):
887 pass
888 -class NMTOKEN(_AttributeType):
889 pass
890 -class ENTITY(_AttributeType):
891 pass
892 893 894 PCDATA = Text 895 ANY = True 896 EMPTY = None 897 898 # enumerations 899 AT_CDATA = 1 900 AT_ID = 2 901 AT_IDREF = 3 902 AT_IDREFS = 4 903 AT_ENTITY = 5 904 AT_ENTITIES = 6 905 AT_NMTOKEN = 7 906 AT_NMTOKENS = 8 907 908 REQUIRED = 11 909 IMPLIED = 12 910 DEFAULT = 13 911 FIXED = 14 912 913 _ATTRTYPEMAP = { 914 "CDATA": AT_CDATA, 915 "ID": AT_ID, 916 "IDREF": AT_IDREF, 917 "IDREFS": AT_IDREFS, 918 "ENTITY": AT_ENTITY, 919 "ENTITIES": AT_ENTITIES, 920 "NMTOKEN": AT_NMTOKEN, 921 "NMTOKENS": AT_NMTOKENS 922 } 923 924 _ATTRCLASSMAP = { 925 AT_CDATA: CDATA, 926 AT_ID: ID, 927 AT_IDREF: IDREF, 928 AT_IDREFS: IDREFS, 929 AT_ENTITY: ENTITY, 930 AT_ENTITIES: ENTITIES, 931 AT_NMTOKEN: NMTOKEN, 932 AT_NMTOKENS: NMTOKENS 933 } 934 935 _DEFAULTMAP = { 936 u'#REQUIRED': REQUIRED, 937 u'#IMPLIED': IMPLIED, 938 u'#DEFAULT': DEFAULT, 939 u'#FIXED': FIXED, 940 } 941
942 -class XMLAttribute:
943 - def __init__(self, name, a_type, a_decl, default=None):
944 self.name = str(name) 945 a_type_type = type(a_type) 946 #a_decl_type = type(a_decl) 947 if a_type_type is unicode: # from the parser 948 self.a_type = _ATTRTYPEMAP.get(str(a_type), a_type) 949 # elif a_type_type is tuple or a_type_type is list: 950 # self.a_type = a_type # XXX 951 elif a_type_type is int: # from the generated file 952 self.a_type = _ATTRCLASSMAP.get(a_type, a_type) 953 elif a_type_type is list: 954 self.a_type = Enumeration(map(str, a_type)) 955 else: 956 self.a_type = a_type 957 # declaration 958 # convert string to int value when generating, just use the int when using. 959 self.a_decl = _DEFAULTMAP.get(a_decl, a_decl) 960 self.default = default 961 # save the type to speed verify 962 self.a_type_type = type(self.a_type)
963
964 - def __repr__(self):
965 return "%s(%r, %r, %r, %r)" % (self.__class__, self.name, self.a_type, self.a_decl, self.default)
966
967 - def verify(self, value):
968 if type(self.a_type) is list: 969 if value not in self.a_type: 970 raise ValidationError, "Enumeration has wrong value. %s is not one of %r." % (value, self.a_type)
971 972 973 974 975 976 # this DTD parser consumer generates the Python source code from the DTD.
977 -class DTDConsumerForSourceGeneration:
978 - def __init__(self, generator, mixins=None, toupper=0):
979 self.generator = generator 980 self.elements = {} 981 self.parameter_entities = {} 982 self.general_entities = {} 983 self.toupper = toupper # should element names be converted to all caps? 984 self.mixins = mixins # should be a module object
985
986 - def dtd_start(self):
987 print "Starting to parse DTD...", 988 self.generator.add_comment("This file generated by a program. do not edit.") 989 self.generator.add_import(sys.modules[__name__]) 990 if self.mixins: 991 self.generator.add_import(self.mixins)
992
993 - def dtd_end(self):
994 print "done parsing. Writing file." 995 self.generator.write()
996
997 - def new_element_type(self, elem_name, elem_cont):
998 "Receives the declaration of an element type." 999 try: 1000 element = self.elements[elem_name] 1001 except KeyError: 1002 parents = [ElementNode] 1003 mixinname = "%sMixin" % ( elem_name ) 1004 if self.mixins and hasattr(self.mixins, mixinname): 1005 parents.insert(0, getattr(self.mixins, mixinname)) 1006 ch = self.generator.add_class(IF(self.toupper, elem_name.upper(), elem_name), tuple(parents)) 1007 ch.add_attribute("CONTENTMODEL", _ContentModelGenerator(elem_cont)) 1008 self.elements[elem_name] = ch
1009
1010 - def new_attribute(self, elem, attr, a_type, a_decl, a_def):
1011 "Receives the declaration of a new attribute." 1012 try: 1013 element = self.elements[elem] 1014 except KeyError: 1015 raise ValidationError, "attribute defined before element!" 1016 try: 1017 attlist = element.get_attribute("ATTLIST") 1018 except KeyError: 1019 element.add_attribute("ATTLIST", AttributeList()) 1020 attlist = element.get_attribute("ATTLIST") 1021 attlist.append(XMLAttribute(attr, a_type, a_decl, a_def))
1022
1023 - def handle_comment(self, contents):
1024 "Receives the contents of a comment." 1025 self.generator.add_comment(contents)
1026
1027 - def new_parameter_entity(self,name,val):
1028 "Receives internal parameter entity declarations." 1029 # these are handled internally by the DTD parser. but.. save it anyway. 1030 self.parameter_entities[name] = val
1031
1032 - def new_external_pe(self, name, pubid, sysid):
1033 "Receives external parameter entity declarations."
1034 # these are handled internally by the DTD parser. 1035
1036 - def new_general_entity(self,name,val):
1037 "Receives internal general entity declarations." 1038 self.general_entities[name] = val
1039 # XXX do we need to handle this? 1040 #print "XXX general entity:" 1041 #print name, val 1042
1043 - def new_external_entity(self, ent_name, pub_id, sys_id, ndata):
1044 """Receives external general entity declarations. 'ndata' is the 1045 empty string if the entity is parsed.""" 1046 # XXX do we need to handle this? 1047 print "XXX external entity:" 1048 print ent_name, pub_id, sys_id, ndata
1049
1050 - def new_notation(self,name,pubid,sysid):
1051 "Receives notation declarations." 1052 # XXX do we need to handle this? 1053 print "XXX unhandled notation:", 1054 print name, pubid, sysid
1055
1056 - def handle_pi(self, target, data):
1057 "Receives the target and data of processing instructions." 1058 # XXX do we need to handle this? 1059 print "XXX unhandled PI:", 1060 print target, data
1061 1062 ######################################################### 1063 # Utility functions 1064 ######################################################### 1065
1066 -def IF(test, tv, fv=None):
1067 if test: 1068 return tv 1069 else: 1070 return fv
1071
1072 -def get_mod_file(sourcefilename):
1073 """get_mod_file(sourcefilename) 1074 Converts a file name into a file name inside the dtds package. This file 1075 name is the destination for generated python files. 1076 """ 1077 import DTDs as dtds 1078 modname = os.path.splitext(os.path.split(sourcefilename)[1])[0] 1079 return os.path.join(dtds.__path__[0], modname.translate(string.maketrans("-.", "__"))+".py")
1080 1081
1082 -def _find_element(elname, modules):
1083 for mod in modules: 1084 try: 1085 return getattr(mod, elname) 1086 except AttributeError: 1087 continue 1088 return None
1089
1090 -def _construct_node(name, modules):
1091 if "[" not in name: 1092 nc = _find_element(name, modules) 1093 if nc is None: 1094 raise ValidationError, "no such element name in modules" 1095 return nc() # node 1096 else: 1097 xpath_re = re.compile(r'(\w*)(\[.*])') 1098 mo = xpath_re.match(name) 1099 if mo: 1100 attdict = {} 1101 ename, attribs = mo.groups() 1102 nc = _find_element(ename, modules) 1103 if nc is None: 1104 raise ValidationError, "no such element name in modules" 1105 attribs = attribs[1:-1].split("and") # chop brackets and split on 'and' 1106 attribs = map(string.strip, attribs) # strip whitespace 1107 for att in attribs: # dict elememnts are name and vaue 1108 name, val = att.split("=") 1109 attdict[name[1:]] = val[1:-1] 1110 return nc( *(), **attdict)
1111 1112 1113
1114 -def make_node(path, modules, value=None):
1115 """make_Node(path, modules, [value]) 1116 Makes a node or an XML fragment given a path, element module list, and an 1117 optional value. 1118 """ 1119 if type(modules) is not list: 1120 modules = [modules] 1121 pathelements = path.split("/") 1122 if not pathelements[0]: # delete possible empty root node 1123 del pathelements[0] 1124 rootnode = current = _construct_node(pathelements[0], modules) 1125 for element in pathelements[1:]: 1126 new = _construct_node(element, modules) 1127 current.append(new) 1128 current = new 1129 current.set_inline() 1130 if value is not None: 1131 current.add_text(value) 1132 return rootnode
1133 1134
1135 -def unescape(s):
1136 if '&' not in s: 1137 return s 1138 s = s.replace("&lt;", "<") 1139 s = s.replace("&gt;", ">") 1140 # s = s.replace("&apos;", "'") 1141 s = s.replace("&quot;", '"') 1142 s = s.replace("&amp;", "&") # Must be last 1143 return s
1144
1145 -def escape(s):
1146 s = s.replace("&", "&amp;") # Must be first 1147 s = s.replace("<", "&lt;") 1148 s = s.replace(">", "&gt;") 1149 # s = s.replace("'", "&apos;") 1150 s = s.replace('"', "&quot;") 1151 return s
1152 1153 # self test 1154 if __name__ == "__main__": 1155 import os 1156 FILE = os.path.join(os.environ["PAF_HOME"], "etc", "dtd", "WCSinvalidation.dtd") 1157 outfilename = get_mod_file(FILE) 1158 argc = len(sys.argv) 1159 # outfile = open(outfilename, "w") 1160 1161 # note: running this script as __main__ will not generate valid source code. 1162 # Use the dtd2py script for that. 1163 dtdp = get_dtd_compiler(sys.stdout) 1164 dtdp.parse_resource(FILE) 1165 # outfile.close() 1166 print Comment("some ------- comment-") 1167 print "+++++++" 1168 import dtds.pvsystem as pvs 1169 n = make_node("/pvsystem[@major='2' and @dot='0' and @minor='0']/pvac/httpOwsPort", pvs, 8080) 1170 print n 1171 print "+++++++" 1172 print make_node('/pvsystem[@major="2" and @minor="0" and @dot="0"]/globals/enableMonitor', pvs, "true") 1173 print "+++++++" 1174 print make_node('globals/enableMonitor', pvs, "true") 1175 print "+++++++" 1176 print make_node('enableMonitor', pvs, "true") 1177 print "+++++++" 1178 print make_node('enableMonitor', pvs) 1179