001 package org.codehaus.groovy.sandbox.util; 002 import groovy.lang.Closure; 003 import groovy.lang.GroovyObject; 004 import groovy.lang.GroovyObjectSupport; 005 import groovy.lang.Writable; 006 007 import java.io.File; 008 import java.io.FileInputStream; 009 import java.io.IOException; 010 import java.io.InputStream; 011 import java.io.Reader; 012 import java.io.StringReader; 013 import java.io.Writer; 014 import java.security.AccessController; 015 import java.security.PrivilegedActionException; 016 import java.security.PrivilegedExceptionAction; 017 import java.util.HashMap; 018 import java.util.Iterator; 019 import java.util.LinkedList; 020 import java.util.List; 021 import java.util.Map; 022 023 import javax.xml.parsers.ParserConfigurationException; 024 import javax.xml.parsers.SAXParser; 025 import javax.xml.parsers.SAXParserFactory; 026 027 import org.codehaus.groovy.sandbox.markup.Buildable; 028 import org.xml.sax.Attributes; 029 import org.xml.sax.InputSource; 030 import org.xml.sax.SAXException; 031 import org.xml.sax.XMLReader; 032 import org.xml.sax.helpers.DefaultHandler; 033 034 035 public class XmlSlurper extends DefaultHandler { 036 private final XMLReader reader; 037 private List result = null; 038 private List body = null; 039 private final StringBuffer charBuffer = new StringBuffer(); 040 041 public XmlSlurper() throws ParserConfigurationException, SAXException { 042 this(false, true); 043 } 044 045 public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException { 046 SAXParserFactory factory = null; 047 048 try { 049 factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() { 050 public Object run() throws ParserConfigurationException { 051 return SAXParserFactory.newInstance(); 052 } 053 }); 054 } catch (final PrivilegedActionException pae) { 055 final Exception e = pae.getException(); 056 057 if (e instanceof ParserConfigurationException) { 058 throw (ParserConfigurationException) e; 059 } else { 060 throw new RuntimeException(e); 061 } 062 } 063 factory.setNamespaceAware(namespaceAware); 064 factory.setValidating(validating); 065 066 final SAXParser parser = factory.newSAXParser(); 067 this.reader = parser.getXMLReader(); 068 } 069 070 public XmlSlurper(final XMLReader reader) { 071 this.reader = reader; 072 } 073 074 public XmlSlurper(final SAXParser parser) throws SAXException { 075 this(parser.getXMLReader()); 076 } 077 078 /** 079 * Parse the content of the specified input source into a List 080 */ 081 public XmlList parse(final InputSource input) throws IOException, SAXException { 082 this.reader.setContentHandler(this); 083 this.reader.parse(input); 084 085 return (XmlList)this.result.get(0); 086 } 087 088 /** 089 * Parses the content of the given file as XML turning it into a List 090 */ 091 public XmlList parse(final File file) throws IOException, SAXException { 092 final InputSource input = new InputSource(new FileInputStream(file)); 093 094 input.setSystemId("file://" + file.getAbsolutePath()); 095 096 return parse(input); 097 098 } 099 100 /** 101 * Parse the content of the specified input stream into a List. 102 * Note that using this method will not provide the parser with any URI 103 * for which to find DTDs etc 104 */ 105 public XmlList parse(final InputStream input) throws IOException, SAXException { 106 return parse(new InputSource(input)); 107 } 108 109 /** 110 * Parse the content of the specified reader into a List. 111 * Note that using this method will not provide the parser with any URI 112 * for which to find DTDs etc 113 */ 114 public XmlList parse(final Reader in) throws IOException, SAXException { 115 return parse(new InputSource(in)); 116 } 117 118 /** 119 * Parse the content of the specified URI into a List 120 */ 121 public XmlList parse(final String uri) throws IOException, SAXException { 122 return parse(new InputSource(uri)); 123 } 124 125 /** 126 * A helper method to parse the given text as XML 127 * 128 * @param text 129 * @return 130 */ 131 public XmlList parseText(final String text) throws IOException, SAXException { 132 return parse(new StringReader(text)); 133 } 134 135 136 // ContentHandler interface 137 //------------------------------------------------------------------------- 138 139 /* (non-Javadoc) 140 * @see org.xml.sax.ContentHandler#startDocument() 141 */ 142 public void startDocument() throws SAXException { 143 this.result = null; 144 this.body = new LinkedList(); 145 this.charBuffer.setLength(0); 146 } 147 148 /* (non-Javadoc) 149 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) 150 */ 151 public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException { 152 addNonWhitespaceCdata(); 153 154 final Map attributes = new HashMap(); 155 156 for (int i = atts.getLength() - 1; i != -1; i--) { 157 if (atts.getURI(i).length() == 0) { 158 attributes.put(atts.getQName(i), atts.getValue(i)); 159 } else { 160 // 161 // Note this is strictly incorrect the name is really localname + URI 162 // We need to figure out what to do with paramenters in namespaces 163 // 164 attributes.put(atts.getLocalName(i), atts.getValue(i)); 165 } 166 167 } 168 169 final List newBody = new LinkedList(); 170 171 newBody.add(attributes); 172 173 newBody.add(this.body); 174 175 this.body = newBody; 176 } 177 178 /* (non-Javadoc) 179 * @see org.xml.sax.ContentHandler#characters(char[], int, int) 180 */ 181 public void characters(final char[] ch, final int start, final int length) throws SAXException { 182 this.charBuffer.append(ch, start, length); 183 } 184 185 /* (non-Javadoc) 186 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) 187 */ 188 public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException { 189 addNonWhitespaceCdata(); 190 191 final List children = this.body; 192 193 final Map attributes = (Map)this.body.remove(0); 194 195 this.body = (List)this.body.remove(0); 196 197 if (namespaceURI.length() == 0) { 198 this.body.add(new XmlList(qName, attributes, children, namespaceURI)); 199 } else { 200 this.body.add(new XmlList(localName, attributes, children, namespaceURI)); 201 } 202 } 203 204 /* (non-Javadoc) 205 * @see org.xml.sax.ContentHandler#endDocument() 206 */ 207 public void endDocument() throws SAXException { 208 this.result = this.body; 209 this.body = null; 210 } 211 212 // Implementation methods 213 //------------------------------------------------------------------------- 214 215 /** 216 * 217 */ 218 private void addNonWhitespaceCdata() { 219 if (this.charBuffer.length() != 0) { 220 // 221 // This element is preceeded by CDATA if it's not whitespace add it to the body 222 // Note that, according to the XML spec, we should preserve the CDATA if it's all whitespace 223 // but for the sort of work I'm doing ignoring the whitespace is preferable 224 // 225 final String cdata = this.charBuffer.toString(); 226 227 this.charBuffer.setLength(0); 228 if (cdata.trim().length() != 0) { 229 this.body.add(cdata); 230 } 231 } 232 } 233 } 234 235 class XmlList extends GroovyObjectSupport implements Writable, Buildable { 236 final String name; 237 final Map attributes; 238 final Object[] children; 239 final String namespaceURI; 240 241 public XmlList(final String name, final Map attributes, final List body, final String namespaceURI) { 242 super(); 243 244 this.name = name; 245 this.attributes = attributes; 246 this.children = body.toArray(); 247 this.namespaceURI = namespaceURI; 248 } 249 250 public Object getProperty(final String elementName) { 251 if (elementName.startsWith("@")) { 252 return this.attributes.get(elementName.substring(1)); 253 } else { 254 final int indexOfFirst = getNextXmlElement(elementName, -1); 255 256 if (indexOfFirst == -1) { // no elements match the element name 257 return new ElementCollection() { 258 protected ElementCollection getResult(final String property) { 259 return this; 260 } 261 262 /** 263 * 264 * Used by the Invoker when it wants to iterate over this object 265 * 266 * @return 267 */ 268 public ElementIterator iterator() { 269 return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{-1}) { 270 { 271 findNextChild(); // set up the element indexes 272 } 273 274 protected void findNextChild() { 275 this.nextParentElements[0] = -1; 276 } 277 }; 278 } 279 }; 280 } 281 282 if (getNextXmlElement(elementName, indexOfFirst) == -1) { // one element matches the element name 283 return this.children[indexOfFirst]; 284 } else { // > 1 element matches the element name 285 return new ElementCollection() { 286 protected ElementCollection getResult(final String property) { 287 return new ComplexElementCollection(new XmlList[]{XmlList.this}, 288 new int[] {indexOfFirst}, 289 new String[] {elementName}, 290 property); 291 } 292 293 /** 294 * 295 * Used by the Invoker when it wants to iterate over this object 296 * 297 * @return 298 */ 299 public ElementIterator iterator() { 300 return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{indexOfFirst}) { 301 protected void findNextChild() { 302 this.nextParentElements[0] = XmlList.this.getNextXmlElement(elementName, this.nextParentElements[0]); 303 } 304 }; 305 } 306 }; 307 } 308 } 309 } 310 311 public Object getAt(final int index) { 312 if (index == 0) { 313 return this; 314 } else { 315 throw new ArrayIndexOutOfBoundsException(index); 316 } 317 } 318 319 public int size() { 320 return 1; 321 } 322 323 public Object invokeMethod(final String name, final Object args) { 324 if ("attributes".equals(name)) { 325 return this.attributes; 326 } else if ("name".equals(name)) { 327 return this.name; 328 } else if ("children".equals(name)) { 329 return this.children; 330 } else if ("contents".equals(name)) { 331 return new Buildable() { 332 public void build(GroovyObject builder) { 333 buildChildren(builder); 334 } 335 }; 336 } else if ("text".equals(name)) { 337 return text(); 338 } else if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) { 339 return getProperty((String)((Object[])args)[0]); 340 } else if ("depthFirst".equals(name)) { 341 // 342 // TODO: replace this with an iterator 343 // 344 345 return new GroovyObjectSupport() { 346 public Object invokeMethod(final String name, final Object args) { 347 if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) { 348 return getProperty((String)((Object[])args)[0]); 349 } else { 350 return XmlList.this.invokeMethod(name, args); 351 } 352 } 353 354 public Object getProperty(final String property) { 355 if (property.startsWith("@")) { 356 return XmlList.this.getProperty(property); 357 } else { 358 final List result = new LinkedList(); 359 360 depthFirstGetProperty(property, XmlList.this.children, result); 361 362 return result; 363 } 364 } 365 366 private void depthFirstGetProperty(final String property, final Object[] contents, final List result) { 367 for (int i = 0; i != contents.length; i++) { 368 final Object item = contents[i]; 369 370 if (item instanceof XmlList) { 371 if (((XmlList)item).name.equals(property)) { 372 result.add(item); 373 } 374 375 depthFirstGetProperty(property, ((XmlList)item).children, result); 376 } 377 } 378 } 379 }; 380 } else { 381 return getMetaClass().invokeMethod(this, name, args); 382 } 383 } 384 385 /* (non-Javadoc) 386 * @see groovy.lang.Writable#writeTo(java.io.Writer) 387 */ 388 public Writer writeTo(Writer out) throws IOException { 389 390 for (int i = 0; i != this.children.length; i++) { 391 final Object child = this.children[i]; 392 393 if (child instanceof String) { 394 out.write((String)child); 395 } else { 396 ((XmlList)child).writeTo(out); 397 } 398 } 399 400 return out; 401 } 402 403 /* (non-Javadoc) 404 * @see org.codehaus.groovy.sandbox.markup.Buildable#build(groovy.lang.GroovyObject) 405 */ 406 public void build(final GroovyObject builder) { 407 // TODO handle Namespaces 408 final Closure rest = new Closure(null) { 409 public Object doCall(final Object o) { 410 buildChildren(builder); 411 412 return null; 413 } 414 }; 415 416 builder.invokeMethod(this.name, new Object[]{this.attributes, rest}); 417 418 } 419 420 public String toString() { 421 return text(); 422 } 423 424 private String text() { 425 final StringBuffer buff = new StringBuffer(); 426 427 for (int i = 0; i != this.children.length; i++) { 428 final Object child = this.children[i]; 429 430 if (child instanceof String) { 431 buff.append(child); 432 } else { 433 buff.append(((XmlList)child).text()); 434 } 435 } 436 437 return buff.toString(); 438 } 439 440 private void buildChildren(final GroovyObject builder) { 441 for (int i = 0; i != this.children.length; i++) { 442 if (this.children[i] instanceof Buildable) { 443 ((Buildable)this.children[i]).build(builder); 444 } else { 445 builder.getProperty("mkp"); 446 builder.invokeMethod("yield", new Object[]{this.children[i]}); 447 } 448 } 449 } 450 451 protected int getNextXmlElement(final String name, final int lastFound) { 452 for (int i = lastFound + 1; i < this.children.length; i++) { 453 final Object item = this.children[i]; 454 455 if (item instanceof XmlList && ((XmlList)item).name.equals(name)) { 456 return i; 457 } 458 } 459 460 return -1; 461 } 462 } 463 464 abstract class ElementIterator implements Iterator { 465 protected final XmlList[] parents; 466 protected final int[] nextParentElements; 467 468 protected ElementIterator(final XmlList[] parents, int[] nextParentElements) { 469 this.parents = new XmlList[parents.length]; 470 System.arraycopy(parents, 0, this.parents, 0, parents.length); 471 472 this.nextParentElements = new int[nextParentElements.length]; 473 System.arraycopy(nextParentElements, 0, this.nextParentElements, 0, nextParentElements.length); 474 } 475 476 /* (non-Javadoc) 477 * @see java.util.Iterator#hasNext() 478 */ 479 public boolean hasNext() { 480 return this.nextParentElements[0] != -1; 481 } 482 483 /* (non-Javadoc) 484 * @see java.util.Iterator#next() 485 */ 486 public Object next() { 487 final Object result = this.parents[0].children[this.nextParentElements[0]]; 488 489 findNextChild(); 490 491 return result; 492 } 493 494 /* (non-Javadoc) 495 * @see java.util.Iterator#remove() 496 */ 497 public void remove() { 498 throw new UnsupportedOperationException(); 499 } 500 501 protected abstract void findNextChild(); 502 } 503 504 abstract class ElementCollection extends GroovyObjectSupport { 505 private int count = -1; 506 507 public abstract ElementIterator iterator(); 508 509 /* (non-Javadoc) 510 * @see groovy.lang.GroovyObject#getProperty(java.lang.String) 511 */ 512 public Object getProperty(final String property) { 513 final ElementCollection result = getResult(property); 514 final Iterator iterator = result.iterator(); 515 516 if (iterator.hasNext()) { 517 // 518 // See if there's only one available 519 // 520 final Object first = iterator.next(); 521 522 if (!iterator.hasNext()) { 523 return first; 524 } 525 } 526 527 return result; 528 } 529 530 protected abstract ElementCollection getResult(String property); 531 532 public synchronized Object getAt(int index) { 533 if (index >= 0) { 534 final Iterator iter = iterator(); 535 536 while (iter.hasNext()) { 537 if (index-- == 0) { 538 return iter.next(); 539 } else { 540 iter.next(); 541 } 542 } 543 } 544 545 throw new ArrayIndexOutOfBoundsException(index); 546 } 547 548 public synchronized int size() { 549 if (this.count == -1) { 550 final Iterator iter = iterator(); 551 552 this.count = 0; 553 554 while (iter.hasNext()) { 555 this.count++; 556 iter.next(); 557 } 558 } 559 return this.count; 560 } 561 } 562 563 class ComplexElementCollection extends ElementCollection { 564 private final XmlList[] parents; 565 private final int[] nextParentElements; 566 private final String[] parentElementNames; 567 568 public ComplexElementCollection(final XmlList[] parents, 569 final int[] nextParentElements, 570 final String[] parentElementNames, 571 final String childElementName) 572 { 573 this.parents = new XmlList[parents.length + 1]; 574 this.parents[0] = (XmlList)parents[0].children[nextParentElements[0]]; 575 System.arraycopy(parents, 0, this.parents, 1, parents.length); 576 577 this.nextParentElements = new int[nextParentElements.length + 1]; 578 this.nextParentElements[0] = -1; 579 System.arraycopy(nextParentElements, 0, this.nextParentElements, 1, nextParentElements.length); 580 581 this.parentElementNames = new String[parentElementNames.length + 1]; 582 this.parentElementNames[0] = childElementName; 583 System.arraycopy(parentElementNames, 0, this.parentElementNames, 1, parentElementNames.length); 584 585 // 586 // Use the iterator to get the index of the first element 587 // 588 589 final ElementIterator iter = this.iterator(); 590 591 iter.findNextChild(); 592 593 this.nextParentElements[0] = iter.nextParentElements[0]; 594 } 595 596 protected ElementCollection getResult(final String property) { 597 return new ComplexElementCollection(this.parents, 598 this.nextParentElements, 599 this.parentElementNames, 600 property); 601 } 602 603 /** 604 * 605 * Used by the Invoker when it wants to iterate over this object 606 * 607 * @return 608 */ 609 public ElementIterator iterator() { 610 return new ElementIterator(this.parents, this.nextParentElements) { 611 protected void findNextChild() { 612 this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], this.nextParentElements[0]); 613 614 while (this.nextParentElements[0] == -1) { 615 this.parents[0] = findNextParent(1); 616 617 if (this.parents[0] == null) { 618 return; 619 } else { 620 this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], -1); 621 } 622 } 623 } 624 625 private XmlList findNextParent(final int i) { 626 if (i == this.nextParentElements.length) return null; 627 628 this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], this.nextParentElements[i]); 629 630 while (this.nextParentElements[i] == -1) { 631 this.parents[i] = findNextParent(i + 1); 632 633 if (this.parents[i] == null) { 634 return null; 635 } else { 636 this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], -1); 637 } 638 } 639 640 return (XmlList)this.parents[i].children[this.nextParentElements[i]]; 641 } 642 }; 643 } 644 }