001    package org.codehaus.groovy.sandbox.util;
002    import groovy.lang.Closure;
003    import groovy.lang.GroovyObject;
004    import groovy.lang.GroovyObjectSupport;
005    import groovy.lang.Writable;
006    
007    import java.io.File;
008    import java.io.FileInputStream;
009    import java.io.IOException;
010    import java.io.InputStream;
011    import java.io.Reader;
012    import java.io.StringReader;
013    import java.io.Writer;
014    import java.security.AccessController;
015    import java.security.PrivilegedActionException;
016    import java.security.PrivilegedExceptionAction;
017    import java.util.HashMap;
018    import java.util.Iterator;
019    import java.util.LinkedList;
020    import java.util.List;
021    import java.util.Map;
022    
023    import javax.xml.parsers.ParserConfigurationException;
024    import javax.xml.parsers.SAXParser;
025    import javax.xml.parsers.SAXParserFactory;
026    
027    import org.codehaus.groovy.sandbox.markup.Buildable;
028    import org.xml.sax.Attributes;
029    import org.xml.sax.InputSource;
030    import org.xml.sax.SAXException;
031    import org.xml.sax.XMLReader;
032    import org.xml.sax.helpers.DefaultHandler;
033    
034    
035    public class XmlSlurper extends DefaultHandler {
036        private final XMLReader reader;
037            private List result = null;
038            private List body = null;
039            private final StringBuffer charBuffer = new StringBuffer();
040    
041        public XmlSlurper() throws ParserConfigurationException, SAXException {
042            this(false, true);
043        }
044    
045        public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
046            SAXParserFactory factory = null;
047            
048                    try {
049                                    factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
050                                            public Object run() throws ParserConfigurationException {
051                                                    return SAXParserFactory.newInstance();
052                                            }
053                                    });
054                    } catch (final PrivilegedActionException pae) {
055                    final Exception e = pae.getException();
056                            
057                            if (e instanceof ParserConfigurationException) {
058                                    throw (ParserConfigurationException) e;
059                            } else {
060                                    throw new RuntimeException(e);
061                            }
062                    }
063            factory.setNamespaceAware(namespaceAware);
064            factory.setValidating(validating);
065    
066            final SAXParser parser = factory.newSAXParser();
067            this.reader = parser.getXMLReader();
068        }
069    
070        public XmlSlurper(final XMLReader reader) {
071            this.reader = reader;
072        }
073    
074        public XmlSlurper(final SAXParser parser) throws SAXException {
075            this(parser.getXMLReader());
076        }
077    
078        /**
079         * Parse the content of the specified input source into a List
080         */
081        public XmlList parse(final InputSource input) throws IOException, SAXException {
082                    this.reader.setContentHandler(this);
083                    this.reader.parse(input);
084            
085            return (XmlList)this.result.get(0);
086        }
087        
088        /**
089         * Parses the content of the given file as XML turning it into a List
090         */
091        public XmlList parse(final File file) throws IOException, SAXException {
092        final InputSource input = new InputSource(new FileInputStream(file));
093        
094            input.setSystemId("file://" + file.getAbsolutePath());
095            
096            return parse(input);
097    
098        }
099    
100        /**
101         * Parse the content of the specified input stream into a List.
102         * Note that using this method will not provide the parser with any URI
103         * for which to find DTDs etc
104         */
105        public XmlList parse(final InputStream input) throws IOException, SAXException {
106            return parse(new InputSource(input));
107        }
108    
109        /**
110         * Parse the content of the specified reader into a List.
111         * Note that using this method will not provide the parser with any URI
112         * for which to find DTDs etc
113         */
114        public XmlList parse(final Reader in) throws IOException, SAXException {
115            return parse(new InputSource(in));
116        }
117    
118        /**
119         * Parse the content of the specified URI into a List
120         */
121        public XmlList parse(final String uri) throws IOException, SAXException {
122            return parse(new InputSource(uri));
123        }
124    
125        /**
126         * A helper method to parse the given text as XML
127         * 
128         * @param text
129         * @return
130         */
131        public XmlList parseText(final String text) throws IOException, SAXException {
132            return parse(new StringReader(text));
133        }
134        
135    
136        // ContentHandler interface
137        //-------------------------------------------------------------------------                    
138            
139            /* (non-Javadoc)
140             * @see org.xml.sax.ContentHandler#startDocument()
141             */
142            public void startDocument() throws SAXException {
143                    this.result = null;
144                    this.body = new LinkedList();
145                    this.charBuffer.setLength(0);
146            }
147            
148            /* (non-Javadoc)
149             * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
150             */
151            public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
152                    addNonWhitespaceCdata();
153                    
154                    final Map attributes = new HashMap();
155                    
156                    for (int i = atts.getLength() - 1; i != -1; i--) {
157                            if (atts.getURI(i).length() == 0) {
158                                    attributes.put(atts.getQName(i), atts.getValue(i));
159                            } else {
160                                    //
161                                    // Note this is strictly incorrect the name is really localname + URI
162                                    // We need to figure out what to do with paramenters in namespaces
163                                    //
164                                    attributes.put(atts.getLocalName(i), atts.getValue(i));
165                            }
166                            
167                    }
168                    
169                    final List newBody = new LinkedList();
170    
171                    newBody.add(attributes);
172                    
173                    newBody.add(this.body);
174    
175                    this.body = newBody;
176            }
177    
178            /* (non-Javadoc)
179             * @see org.xml.sax.ContentHandler#characters(char[], int, int)
180             */
181            public void characters(final char[] ch, final int start, final int length) throws SAXException {
182                    this.charBuffer.append(ch, start, length);
183            }
184            
185            /* (non-Javadoc)
186             * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
187             */
188            public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
189                    addNonWhitespaceCdata();
190                    
191                    final List children = this.body;
192                    
193                    final Map attributes = (Map)this.body.remove(0);
194                    
195                    this.body = (List)this.body.remove(0);
196                    
197                    if (namespaceURI.length() == 0) {
198                            this.body.add(new XmlList(qName, attributes, children, namespaceURI));
199                    } else {
200                            this.body.add(new XmlList(localName, attributes, children, namespaceURI));
201                    }
202            }
203            
204            /* (non-Javadoc)
205             * @see org.xml.sax.ContentHandler#endDocument()
206             */
207            public void endDocument() throws SAXException {
208                    this.result = this.body;
209                    this.body = null;
210            }
211    
212        // Implementation methods
213        //-------------------------------------------------------------------------           
214    
215            /**
216             * 
217             */
218            private void addNonWhitespaceCdata() {
219                    if (this.charBuffer.length() != 0) {
220                            //
221                            // This element is preceeded by CDATA if it's not whitespace add it to the body
222                            // Note that, according to the XML spec, we should preserve the CDATA if it's all whitespace
223                            // but for the sort of work I'm doing ignoring the whitespace is preferable
224                            //
225                            final String cdata = this.charBuffer.toString();
226                            
227                            this.charBuffer.setLength(0);
228                            if (cdata.trim().length() != 0) {
229                                    this.body.add(cdata);
230                            }
231                    }               
232            }
233    }
234    
235    class XmlList extends GroovyObjectSupport implements Writable, Buildable {
236            final String name;
237            final Map attributes;
238            final Object[] children;
239            final String namespaceURI;
240            
241        public XmlList(final String name, final Map attributes, final List body, final String namespaceURI) {
242            super();
243            
244            this.name = name;
245            this.attributes = attributes;
246            this.children = body.toArray();
247            this.namespaceURI = namespaceURI;
248        }
249        
250        public Object getProperty(final String elementName) {
251                    if (elementName.startsWith("@")) {
252                            return this.attributes.get(elementName.substring(1));
253                    } else {
254                    final int indexOfFirst = getNextXmlElement(elementName, -1);
255                    
256                            if (indexOfFirst == -1) { // no elements match the element name
257                                    return new ElementCollection() {
258                                            protected ElementCollection getResult(final String property) {
259                                                    return this;
260                                            }
261    
262                                            /**
263                                             * 
264                                             * Used by the Invoker when it wants to iterate over this object
265                                             * 
266                                             * @return
267                                             */
268                                            public ElementIterator iterator() {
269                                                    return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{-1}) {
270                                                            {
271                                                                    findNextChild();                // set up the element indexes
272                                                            }
273                                                            
274                                                            protected void findNextChild() {
275                                                                    this.nextParentElements[0] = -1;
276                                                            }
277                                                    };
278                                            }
279                                    };
280                            }
281                            
282                            if (getNextXmlElement(elementName, indexOfFirst) == -1) {       // one element matches the element name
283                                    return this.children[indexOfFirst];
284                            } else {                // > 1 element matches the element name
285                                    return new ElementCollection() {
286                                                    protected ElementCollection getResult(final String property) {
287                                                            return new ComplexElementCollection(new XmlList[]{XmlList.this},
288                                                                                                                    new int[] {indexOfFirst},
289                                                                                                                            new String[] {elementName},
290                                                                                                                            property);
291                                                    }
292            
293                                            /**
294                                             * 
295                                             * Used by the Invoker when it wants to iterate over this object
296                                             * 
297                                             * @return
298                                             */
299                                            public ElementIterator iterator() {
300                                                    return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{indexOfFirst}) {
301                                                            protected void findNextChild() {
302                                                                    this.nextParentElements[0] = XmlList.this.getNextXmlElement(elementName, this.nextParentElements[0]);
303                                                            }
304                                                    };
305                                            }
306                                };
307                            }
308                    }
309        }
310        
311        public Object getAt(final int index) {
312                    if (index == 0) {
313                            return this;
314                    } else {
315                            throw new ArrayIndexOutOfBoundsException(index);
316                    }
317            }
318        
319        public int size() {
320                    return 1;
321        }
322    
323        public Object invokeMethod(final String name, final Object args) {
324                    if ("attributes".equals(name)) {
325                            return this.attributes;
326                    } else if ("name".equals(name)) {
327                            return this.name;
328                    } else if ("children".equals(name)) {
329                            return this.children;
330                    } else if ("contents".equals(name)) {
331                            return new Buildable() {
332                                    public void build(GroovyObject builder) {
333                                            buildChildren(builder);
334                                    }
335                            };
336                    } else if ("text".equals(name)) {
337                            return text();
338                    } else if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
339                            return getProperty((String)((Object[])args)[0]);
340                    } else if ("depthFirst".equals(name)) {
341                            //
342                            // TODO: replace this with an iterator
343                            //
344                            
345                            return new GroovyObjectSupport() {
346                                    public Object invokeMethod(final String name, final Object args) {
347                                            if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
348                                                    return getProperty((String)((Object[])args)[0]);
349                                            } else {
350                                                    return XmlList.this.invokeMethod(name, args);
351                                            }
352                                    }
353                                    
354                                    public Object getProperty(final String property) {
355                                            if (property.startsWith("@")) {
356                                                    return XmlList.this.getProperty(property);
357                                            } else {
358                                            final List result = new LinkedList();
359    
360                                                    depthFirstGetProperty(property, XmlList.this.children, result);
361                                                    
362                                                    return result;
363                                            }
364                                    }
365                                    
366                                    private void depthFirstGetProperty(final String property, final Object[] contents, final List result) {
367                                            for (int i = 0; i != contents.length; i++) {
368                                            final Object item = contents[i];
369                                            
370                                                    if (item instanceof XmlList) {
371                                                            if (((XmlList)item).name.equals(property)) {
372                                                                    result.add(item);
373                                                            }
374                                                            
375                                                            depthFirstGetProperty(property, ((XmlList)item).children, result);
376                                                    }
377                                            }
378                                    }
379                            };
380                    } else {
381                            return getMetaClass().invokeMethod(this, name, args);
382                    }
383        }
384        
385            /* (non-Javadoc)
386             * @see groovy.lang.Writable#writeTo(java.io.Writer)
387             */
388            public Writer writeTo(Writer out) throws IOException {
389    
390                    for (int i = 0; i != this.children.length; i++) {
391                    final Object child = this.children[i];
392                    
393                            if (child instanceof String) {
394                                    out.write((String)child);
395                            } else {
396                                    ((XmlList)child).writeTo(out);
397                            }
398                    }
399                    
400                    return out;
401            }
402        
403            /* (non-Javadoc)
404             * @see org.codehaus.groovy.sandbox.markup.Buildable#build(groovy.lang.GroovyObject)
405             */
406            public void build(final GroovyObject builder) {
407                    // TODO handle Namespaces
408            final Closure rest = new Closure(null) {
409                    public Object doCall(final Object o) {
410                            buildChildren(builder);
411                            
412                            return null;
413                    }
414            };
415    
416                    builder.invokeMethod(this.name, new Object[]{this.attributes, rest});
417                    
418            }
419            
420            public String toString() {
421                    return text();
422            }
423            
424            private String text() {
425            final StringBuffer buff = new StringBuffer();
426    
427                    for (int i = 0; i != this.children.length; i++) {
428                    final Object child = this.children[i];
429                    
430                            if (child instanceof String) {
431                                    buff.append(child);
432                            } else {
433                                    buff.append(((XmlList)child).text());
434                            }
435                    }       
436            
437                    return buff.toString();
438            }
439            
440            private void buildChildren(final GroovyObject builder) {
441                    for (int i = 0; i != this.children.length; i++) {
442                            if (this.children[i] instanceof Buildable) {
443                                    ((Buildable)this.children[i]).build(builder);
444                            } else {
445                                    builder.getProperty("mkp");
446                                    builder.invokeMethod("yield", new Object[]{this.children[i]});
447                            }
448                    }
449            }
450    
451            protected int getNextXmlElement(final String name, final int lastFound) {
452                    for (int i = lastFound + 1; i < this.children.length; i++) {
453                    final Object item = this.children[i];
454                            
455                            if (item instanceof XmlList && ((XmlList)item).name.equals(name)) {
456                                    return i;
457                            }
458                    }
459                    
460                    return -1;
461            }
462    }
463    
464    abstract class ElementIterator implements Iterator {
465            protected final XmlList[] parents;
466            protected final int[] nextParentElements;
467            
468            protected ElementIterator(final XmlList[] parents, int[] nextParentElements) {
469                    this.parents = new XmlList[parents.length];
470                    System.arraycopy(parents, 0, this.parents, 0, parents.length);
471                    
472                    this.nextParentElements = new int[nextParentElements.length];
473                    System.arraycopy(nextParentElements, 0, this.nextParentElements, 0, nextParentElements.length);
474            }
475            
476            /* (non-Javadoc)
477             * @see java.util.Iterator#hasNext()
478             */
479            public boolean hasNext() {
480                    return this.nextParentElements[0] != -1;
481            }
482            
483            /* (non-Javadoc)
484             * @see java.util.Iterator#next()
485             */
486            public Object next() {
487            final Object result = this.parents[0].children[this.nextParentElements[0]];
488                            
489                    findNextChild();
490            
491                    return result;
492            }
493            
494            /* (non-Javadoc)
495             * @see java.util.Iterator#remove()
496             */
497            public void remove() {
498                    throw new UnsupportedOperationException();
499            }
500            
501            protected abstract void findNextChild();
502    }
503    
504    abstract class ElementCollection extends GroovyObjectSupport {
505            private int count = -1;
506            
507            public abstract ElementIterator iterator();
508            
509            /* (non-Javadoc)
510             * @see groovy.lang.GroovyObject#getProperty(java.lang.String)
511             */
512            public Object getProperty(final String property) {
513            final ElementCollection result = getResult(property);
514            final Iterator iterator = result.iterator();
515    
516                    if (iterator.hasNext()) {                               
517                            //
518                            // See if there's only one available
519                            //
520                            final Object first = iterator.next();
521                            
522                            if (!iterator.hasNext()) {
523                                    return first;
524                            }
525                    }
526                    
527                    return result;
528            }
529            
530            protected abstract ElementCollection getResult(String property);
531        
532        public synchronized Object getAt(int index) {
533                    if (index >= 0) {
534                    final Iterator iter = iterator();
535                    
536                            while (iter.hasNext()) {
537                                    if (index-- == 0) {
538                                            return iter.next();
539                                    } else {
540                                            iter.next();
541                                    }
542                            }
543                    }
544                    
545                    throw new ArrayIndexOutOfBoundsException(index);
546        }
547        
548            public synchronized int size() {
549                    if (this.count == -1) {
550                    final Iterator iter = iterator();
551                    
552                            this.count = 0;
553                            
554                            while (iter.hasNext()) {
555                                    this.count++;
556                                    iter.next();
557                            }
558                    }
559                    return this.count;
560            }
561    }
562    
563    class ComplexElementCollection extends ElementCollection {
564            private final XmlList[] parents;
565            private final int[] nextParentElements;
566            private final String[] parentElementNames;
567            
568            public ComplexElementCollection(final XmlList[] parents,
569                                                      final int[] nextParentElements,
570                                                                      final String[] parentElementNames,
571                                                                      final String childElementName)
572            {
573                    this.parents = new XmlList[parents.length + 1];
574                    this.parents[0] = (XmlList)parents[0].children[nextParentElements[0]];
575                    System.arraycopy(parents, 0, this.parents, 1, parents.length);
576                    
577                    this.nextParentElements = new int[nextParentElements.length + 1];
578                    this.nextParentElements[0] = -1;        
579                    System.arraycopy(nextParentElements, 0, this.nextParentElements, 1, nextParentElements.length);
580                    
581                    this.parentElementNames = new String[parentElementNames.length + 1];
582                    this.parentElementNames[0] = childElementName;
583                    System.arraycopy(parentElementNames, 0, this.parentElementNames, 1, parentElementNames.length);
584                    
585                    //
586                    // Use the iterator to get the index of the first element
587                    //
588                    
589                    final ElementIterator iter = this.iterator();
590                    
591                    iter.findNextChild();
592                    
593                    this.nextParentElements[0] = iter.nextParentElements[0];
594            }
595            
596            protected ElementCollection getResult(final String property) {
597                    return new ComplexElementCollection(this.parents,
598                                                                                            this.nextParentElements,
599                                                                                            this.parentElementNames,
600                                                                                            property);
601            }
602            
603            /**
604             * 
605             * Used by the Invoker when it wants to iterate over this object
606             * 
607             * @return
608             */
609            public ElementIterator iterator() {
610                    return new ElementIterator(this.parents, this.nextParentElements) {
611                                                    protected void findNextChild() {        
612                                                            this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], this.nextParentElements[0]);
613                                                            
614                                                            while (this.nextParentElements[0] == -1) {
615                                                                    this.parents[0] = findNextParent(1);
616                                                                    
617                                                                    if (this.parents[0] == null) {
618                                                                            return;
619                                                                    } else {
620                                                                            this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], -1);
621                                                                    }
622                                                            }
623                                                    }
624                                                    
625                                                    private XmlList findNextParent(final int i) {
626                                                            if (i == this.nextParentElements.length) return null;
627                                                            
628                                                            this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], this.nextParentElements[i]);
629                                                            
630                                                            while (this.nextParentElements[i] == -1) {
631                                                                    this.parents[i] = findNextParent(i + 1);
632                                                                    
633                                                                    if (this.parents[i] == null) {
634                                                                            return null;
635                                                                    } else {
636                                                                            this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], -1);
637                                                                    }
638                                                            }
639                                                    
640                                                            return (XmlList)this.parents[i].children[this.nextParentElements[i]];
641                                                    }
642                    };
643            }
644    }