0001"""Kid Parser
0002
0003Parses Kid embedded XML to Python source code.
0004"""
0005
0006from __future__ import generators
0007
0008__revision__ = "$Rev: 261 $"
0009__date__ = "$Date: 2006-01-22 00:48:22 -0500 (Sun, 22 Jan 2006) $"
0010__author__ = "Ryan Tomayko (rtomayko@gmail.com)"
0011__copyright__ = "Copyright 2004-2005, Ryan Tomayko"
0012__license__ = "MIT <http://www.opensource.org/licenses/mit-license.php>"
0013
0014import re
0015from kid.pull import *
0016from kid.et import namespaces
0017from kid import Namespace
0018
0019
0020KID_XMLNS = "http://purl.org/kid/ns#"
0021KID_PREFIX = 'py'
0022kidns = Namespace(KID_XMLNS)
0023QNAME_FOR = kidns['for']
0024QNAME_IF = kidns['if']
0025QNAME_DEF = kidns['def']
0026QNAME_SLOT = kidns['slot']
0027QNAME_CONTENT = kidns['content']
0028QNAME_REPLACE = kidns['replace']
0029QNAME_MATCH = kidns['match']
0030QNAME_STRIP = kidns['strip']
0031QNAME_ATTRIBUTES = kidns['attrs']
0032QNAME_EXTENDS = kidns['extends']
0033QNAME_LAYOUT = kidns['layout']
0034
0035
0036QNAME_OMIT = kidns['omit']
0037QNAME_REPEAT = kidns['repeat']
0038
0039
0040KID_PI = 'python'
0041KID_ALT_PI = 'py'
0042KID_OLD_PI = 'kid'
0043
0044def parse(source, encoding=None, filename=None):
0045 parser = KidParser(document(source, encoding=encoding, filename=filename), encoding)
0046 return parser.parse()
0047
0048def parse_file(filename, encoding=None):
0049 """Parse the file specified.
0050
0051 filename -- the name of a file.
0052 fp -- an optional file like object to read from. If not specified,
0053 filename is opened.
0054
0055 """
0056 source = open(filename, 'rb')
0057 try:
0058 return parse(source, encoding, filename=filename)
0059 finally:
0060 source.close()
0061
0062class KidParser(object):
0063 def __init__(self, stream, encoding=None):
0064 self.stream = stream
0065 self.encoding = encoding or 'utf-8'
0066 self.depth = 0
0067 self.module_code = CodeGenerator()
0068 self.class_code = CodeGenerator()
0069 self.expand_code = CodeGenerator(level=1)
0070 self.end_module_code = CodeGenerator()
0071 self.module_defs = []
0072 self.inst_defs = []
0073
0074 def parse(self):
0075 self.begin()
0076 self.proc_stream(self.module_code)
0077 self.end()
0078 parts = []
0079 parts += self.module_code.code
0080 for c in self.module_defs:
0081 parts += c.code
0082 parts += self.class_code.code
0083 parts += self.expand_code.code
0084 for c in self.inst_defs:
0085 parts += c.code
0086 parts += self.end_module_code.code
0087 return '\n'.join(parts)
0088
0089 def begin(self):
0090 code = self.module_code
0091 code.line('from __future__ import generators')
0092 code.line('import kid')
0093 code.line('from kid.template_util import *')
0094 code.line('import kid.template_util as template_util')
0095
0096
0097 code.line('encoding = "%s"' % self.encoding)
0098 code.line('doctype = None')
0099 code.line('omit_namespaces = [kid.KID_XMLNS]')
0100 code.line('layout_params = {}')
0101
0102
0103 code.line('def pull(**kw): return Template(**kw).pull()')
0104 code.line("def generate(encoding=encoding, fragment=0, output=None, **kw): "
0105 "return Template(**kw).generate(encoding=encoding, fragment=fragment, output=output)")
0106 code.line("def serialize(encoding=encoding, fragment=0, output=None, **kw): "
0107 "return Template(**kw).serialize(encoding=encoding, fragment=fragment, output=output)")
0108 code.line("def write(file, encoding=encoding, fragment=0, output=None, **kw): "
0109 "return Template(**kw).write(file, encoding=encoding, fragment=fragment, output=output)")
0110 code.line('BaseTemplate = kid.BaseTemplate')
0111 code.line('def initialize(template): pass')
0112
0113
0114 code = self.expand_code
0115 code.start_block('def initialize(self):')
0116 code.line('rslt = initialize(self)')
0117 code.line('if rslt != 0: super(Template, self).initialize()')
0118 code.end_block()
0119 code.start_block('def _pull(self):')
0120
0121 code.line("exec template_util.get_locals(self, locals())")
0122 code.line('current, ancestors = None, []')
0123 code.line('if doctype: yield (DOCTYPE, doctype)')
0124
0125 code = self.end_module_code
0126 code.line('')
0127
0128 def end(self):
0129 self.expand_code.end_block()
0130
0131 def proc_stream(self, code):
0132 for (ev, item) in self.stream:
0133 if ev == START:
0134 if item.tag == Comment:
0135 text = item.text.lstrip()
0136 if text.startswith('!'):
0137 continue
0138 line = code.line
0139 if text.startswith('<') or text.startswith('['):
0140 sub = interpolate(item.text)
0141 if isinstance(sub, list):
0142 text = "''.join(%r)" % sub
0143 else:
0144 text = repr(sub)
0145 else:
0146 text = repr(item.text)
0147 line('_e = Comment(%s)' % text)
0148 line('yield (START, _e); yield (END, _e); del _e')
0149 elif item.tag == ProcessingInstruction:
0150 if ' ' in item.text.strip():
0151 (name, data) = item.text.split(' ', 1)
0152 else:
0153 (name, data) = (item.text, '')
0154 if name in (KID_PI, KID_ALT_PI, KID_OLD_PI):
0155 if data:
0156 code.insert_block(data)
0157 else:
0158 c = self.depth and code or self.expand_code
0159 c.line('_e = ProcessingInstruction(%r, %r)' % (name, data) )
0161 c.line('yield (START, _e); yield (END, _e); del _e')
0162 del c
0163 else:
0164 layout = None
0165 if code is self.module_code:
0166 layout = item.get(QNAME_LAYOUT)
0167 if layout is not None:
0168 del item.attrib[QNAME_LAYOUT]
0169 decl = ['class Template(']
0170 extends = item.get(QNAME_EXTENDS)
0171 parts = []
0172 if extends is not None:
0173 del item.attrib[QNAME_EXTENDS]
0174 for c in extends.split(','):
0175 parts.append('template_util.get_base_class(%s, __file__)' % c)
0176 parts.append('BaseTemplate')
0177 decl.append(','.join(parts))
0178 decl.append('):')
0179 code = self.class_code
0180 code.start_block(''.join(decl))
0181 code.line('_match_templates = []')
0182 code = self.expand_code
0183 del decl, parts
0184 self.def_proc(item, item.attrib, code)
0185 if layout is not None:
0186 old_code = code
0187 code = CodeGenerator(level=1)
0188 code.start_block("def _pull(self):")
0189 code.line('kw = dict(layout_params)')
0190 code.line('kw.update(self.__dict__)')
0191
0192
0193 code.line('kw.pop("assume_encoding", None)')
0194 code.line('kw.pop("_layout_classes", None)')
0195 code.line('temp = template_util.get_base_class(%s, __file__)(**kw)' % layout)
0196 code.line('temp._match_templates = self._match_templates + temp._match_templates')
0197 code.line('return temp._pull()')
0198 code.end_block()
0199 self.inst_defs.append(code)
0200 code = old_code
0201 elif ev == END and not item.tag in (ProcessingInstruction, Comment):
0202 break
0203 elif ev == TEXT:
0204 self.text_interpolate(item, code)
0205 elif ev == XML_DECL and item[1] is not None:
0206 self.module_code.line('encoding = %r' % item[1])
0207 elif ev == DOCTYPE:
0208 self.module_code.line('doctype = (%r, %r, %r)' % item)
0209
0210 def def_proc(self, item, attrib, code):
0211 attr_name = QNAME_DEF
0212 decl = attrib.get(attr_name)
0213 if decl is None:
0214 attr_name = QNAME_SLOT
0215 decl = attrib.get(attr_name)
0216 if decl is not None:
0217 del attrib[attr_name]
0218 old_code = code
0219 if '(' not in decl:
0220 decl = decl + '()'
0221 name, args = decl.split('(', 1)
0222 pos = args.rfind(')')
0223 args = args[0:pos].strip()
0224 self_ = args and 'self, ' or 'self'
0225 class_decl = '%s(%s%s)' % (name, self_, args)
0226
0227
0228 code = CodeGenerator()
0229 code.start_block('def %s(*args, **kw):' % name)
0230 code.line('return Template().%s(*args, **kw)' % name)
0231 code.end_block()
0232 code.line('layout_params["%s"] = %s' % (name, name))
0233 self.module_defs.append(code)
0234
0235
0236 code = CodeGenerator(level=1)
0237 code.start_block('def %s:' % class_decl)
0238 code.line('exec template_util.get_locals(self, locals())')
0239 code.line('current, ancestors = None, []')
0240 self.inst_defs.append(code)
0241 self.match_proc(item, attrib, code)
0242 code.end_block()
0243 if attr_name == QNAME_SLOT:
0244 old_code.line('for _e in template_util.generate_content(self.%s()): yield _e' % name)
0245 else:
0246 self.match_proc(item, attrib, code)
0247
0248 def match_proc(self, item, attrib, code):
0249 expr = attrib.get(QNAME_MATCH)
0250 if expr is not None:
0251 del attrib[QNAME_MATCH]
0252 old_code = code
0253 code = CodeGenerator(level=1)
0254 code.start_block('def _match_func(self, item, apply):')
0255 code.line('exec template_util.get_locals(self, locals())')
0256 code.line('current, ancestors = None, []')
0257 self.for_proc(item, attrib, code)
0258 code.end_block()
0259 code.line('_match_templates.append((lambda item: %s, _match_func))' % expr)
0261 self.inst_defs.append(code)
0262 else:
0263 self.for_proc(item, attrib, code)
0264
0265 def for_proc(self, item, attrib, code):
0266 expr = attrib.get(QNAME_FOR)
0267 if expr is not None:
0268 code.start_block('for %s:' % expr)
0269 del attrib[QNAME_FOR]
0270 self.if_proc(item, attrib, code)
0271 code.end_block()
0272 else:
0273 self.if_proc(item, attrib, code)
0274
0275 def if_proc(self, item, attrib, code):
0276 expr = attrib.get(QNAME_IF)
0277 if expr is not None:
0278 code.start_block('if %s:' % expr)
0279 del attrib[QNAME_IF]
0280 self.replace_proc(item, attrib, code)
0281 code.end_block()
0282 else:
0283 self.replace_proc(item, attrib, code)
0284
0285 def replace_proc(self, item, attrib, code):
0286 expr = attrib.get(QNAME_REPLACE)
0287 if expr is not None:
0288 del attrib[QNAME_REPLACE]
0289 attrib[QNAME_STRIP] = ""
0290 attrib[QNAME_CONTENT] = expr
0291 self.strip_proc(item, attrib, code)
0292
0293 def strip_proc(self, item, attrib, code):
0294 has_content = self.content_proc(item, attrib, code)
0295 expr, attr = (attrib.get(QNAME_STRIP), QNAME_STRIP)
0296 if expr is None:
0297
0298 expr, attr = (attrib.get(QNAME_OMIT), QNAME_OMIT)
0299 start_block, end_block = (code.start_block, code.end_block)
0300 line = code.line
0301 if expr is not None:
0302 del attrib[attr]
0303 if expr != '':
0304 start_block("if not (%s):" % expr)
0305 self.attrib_proc(item, attrib, code)
0306 end_block()
0307 else:
0308
0309 pass
0310 else:
0311 self.attrib_proc(item, attrib, code)
0312 if has_content:
0313 code.start_block(
0314 'for _e in template_util.generate_content(_cont, current):')
0315 line('yield _e')
0316 line('del _e')
0317 code.end_block()
0318
0319
0320 self.stream.eat()
0321 else:
0322 self.depth += 1
0323 self.proc_stream(code)
0324 self.depth -= 1
0325 if expr:
0326 start_block("if not (%s):" % expr)
0327 line('yield (END, current)')
0328 line('current = ancestors.pop(0)')
0329 end_block()
0330 elif expr != '':
0331 line('yield (END, current)')
0332 line('current = ancestors.pop(0)')
0333
0334 def attrib_proc(self, item, attrib, code):
0335 interp = 0
0336 line = code.line
0337 need_interpolation = 0
0338 names = namespaces(item, remove=1)
0339 for (k,v) in attrib.items():
0340 sub = interpolate(v)
0341 if id(sub) != id(v):
0342 attrib[k] = sub
0343 if isinstance(sub, list):
0344 need_interpolation = 1
0345 expr = attrib.get(QNAME_ATTRIBUTES)
0346
0347 if expr is not None:
0348 del attrib[QNAME_ATTRIBUTES]
0349 attr_text = 'template_util.update_dict(%r, "%s", globals(), locals())' % (attrib, expr.replace('"', '\\\"'))
0351 attr_text = 'template_util.make_attrib(%s,self._get_assume_encoding())' % attr_text
0352 else:
0353 if attrib:
0354 if need_interpolation:
0355 attr_text = 'template_util.make_attrib(%r,self._get_assume_encoding())' % attrib
0356 else:
0357 attr_text = repr(attrib)
0358 else:
0359 attr_text = '{}'
0360 line('ancestors.insert(0, current)')
0361 line('current = Element(%r, %s)' % (item.tag, attr_text))
0362 if len(names):
0363 code.start_block('for _p, _u in %r.items():' % names)
0364 line('if not _u in omit_namespaces: yield (START_NS, (_p,_u))')
0365 code.end_block()
0366 line('yield (START, current)')
0367
0368 def content_proc(self, item, attrib, code):
0369 expr = attrib.get(QNAME_CONTENT)
0370 if expr is not None:
0371 del attrib[QNAME_CONTENT]
0372 code.line('_cont = %s' % expr)
0373 return 1
0374
0375 def text_interpolate(self, text, code):
0376 interp = 0
0377 line = code.line
0378 sub = interpolate(text)
0379 if isinstance(sub, list):
0380 code.start_block('for _e in %r:' % sub)
0381 code.line('for _e2 in template_util.generate_content(_e): yield _e2')
0382 code.end_block()
0383 else:
0384 line('yield (TEXT, %r)' % sub)
0385
0386class SubExpression(list):
0387 def __repr__(self):
0388 return "[%s]" % ', '.join(self)
0389
0390_sub_expr = re.compile(r"(?<!\$)\$\{(.+?)\}")
0391_sub_expr_short = re.compile(r"(?<!\$)\$([a-zA-Z][a-zA-Z0-9_\.]*)")
0392
0393def interpolate(text):
0394 parts = _sub_expr.split(text)
0395 if len(parts) == 1:
0396 parts = _sub_expr_short.split(text)
0397 if len(parts) == 1:
0398 return text.replace('$$', '$')
0399 else:
0400 last_checked = len(parts)
0401 else:
0402 last_checked = -1
0403 new_parts = SubExpression()
0404 i = 0
0405 while i < len(parts):
0406 part = parts[i]
0407 if (i % 2) == 1:
0408
0409 new_parts.append(part)
0410 elif part:
0411
0412 if i >= last_checked:
0413 more_parts = _sub_expr_short.split(part)
0414 parts[i:i+1] = more_parts
0415 last_checked = i + len(more_parts)
0416 continue
0417 else:
0418 new_parts.append(repr(part.replace('$$', '$')))
0419 i += 1
0420 return new_parts
0421
0422
0423class CodeGenerator:
0424 """A simple Python code generator."""
0425
0426 level = 0
0427 tab = '\t'
0428
0429 def __init__(self, code=None, level=0, tab='\t'):
0430 self.code = code or []
0431 if level != self.level:
0432 self.level = level
0433 if tab != self.tab:
0434 self.tab = tab
0435
0436 def line(self, text):
0437 self.code.append('%s%s' % (self.tab * self.level, text))
0438
0439 def start_block(self, text):
0440 self.line(text)
0441 self.level+=1
0442
0443 def end_block(self, nblocks=1, with_pass=False):
0444 for n in range(nblocks):
0445 if with_pass:
0446 self.line('pass')
0447 self.level-=1
0448
0449 def insert_block(self, block):
0450 output_line = self.line
0451 lines = block.splitlines()
0452 if len(lines) == 1:
0453
0454 output_line(lines[0].strip())
0455 else:
0456
0457 for line in _adjust_python_block(lines, self.tab):
0458 output_line(line)
0459
0460 def __str__(self):
0461 self.code.append('')
0462 return '\n'.join(self.code)
0463
0464
0465
0466def _adjust_python_block(lines, tab='\t'):
0467 """Adjust the indentation of a Python block."""
0468 lines = [lines[0].strip()] + [line.rstrip() for line in lines[1:]]
0469 ind = None
0470 for line in lines[1:]:
0471 if line != '':
0472 s = line.lstrip()
0473 if s[0] != '#':
0474 i = len(line) - len(s)
0475 if ind is None or i < ind:
0476 ind = i
0477 if i == 0:
0478 break
0479 if ind is not None or ind != 0:
0480 lines[1:] = [line[:ind].lstrip() + line[ind:]
0481 for line in lines[1:]]
0482 if lines[0] and not lines[0][0] == '#':
0483
0484 try:
0485 compile(lines[0], '<string>', 'exec')
0486
0487 except SyntaxError:
0488 try:
0489 block = '\n'.join(lines) + '\n'
0490 compile(block, '<string>', 'exec')
0491
0492 except IndentationError:
0493
0494 lines2 = lines[:1] + [tab + line for line in lines[1:]]
0495 block = '\n'.join(lines2) + '\n'
0496
0497 compile(block, '<string>', 'exec')
0498 lines = lines2
0499 except:
0500 pass
0501 except:
0502 pass
0503 return lines
0504
0505
0506try:
0507 enumerate
0508except NameError:
0509 def enumerate(seq):
0510 for i, elem in zip(range(len(seq)), seq):
0511 yield (i, elem)