001    /*
002     $Id: XmlParser.java,v 1.4 2004/04/15 17:35:14 jstrachan Exp $
003    
004     Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved.
005    
006     Redistribution and use of this software and associated documentation
007     ("Software"), with or without modification, are permitted provided
008     that the following conditions are met:
009    
010     1. Redistributions of source code must retain copyright
011        statements and notices.  Redistributions must also contain a
012        copy of this document.
013    
014     2. Redistributions in binary form must reproduce the
015        above copyright notice, this list of conditions and the
016        following disclaimer in the documentation and/or other
017        materials provided with the distribution.
018    
019     3. The name "groovy" must not be used to endorse or promote
020        products derived from this Software without prior written
021        permission of The Codehaus.  For written permission,
022        please contact info@codehaus.org.
023    
024     4. Products derived from this Software may not be called "groovy"
025        nor may "groovy" appear in their names without prior written
026        permission of The Codehaus. "groovy" is a registered
027        trademark of The Codehaus.
028    
029     5. Due credit should be given to The Codehaus -
030        http://groovy.codehaus.org/
031    
032     THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS
033     ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
034     NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
035     FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
036     THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
037     INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
038     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
039     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
040     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
041     STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
042     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
043     OF THE POSSIBILITY OF SUCH DAMAGE.
044    
045     */
046    package groovy.util;
047    
048    import groovy.xml.QName;
049    
050    import java.io.File;
051    import java.io.FileInputStream;
052    import java.io.IOException;
053    import java.io.InputStream;
054    import java.io.Reader;
055    import java.io.StringReader;
056    import java.security.AccessController;
057    import java.security.PrivilegedActionException;
058    import java.security.PrivilegedExceptionAction;
059    import java.util.ArrayList;
060    import java.util.HashMap;
061    import java.util.List;
062    import java.util.Map;
063    
064    import javax.xml.parsers.ParserConfigurationException;
065    import javax.xml.parsers.SAXParser;
066    import javax.xml.parsers.SAXParserFactory;
067    
068    import org.xml.sax.Attributes;
069    import org.xml.sax.ContentHandler;
070    import org.xml.sax.InputSource;
071    import org.xml.sax.Locator;
072    import org.xml.sax.SAXException;
073    import org.xml.sax.XMLReader;
074    
075    /**
076     * A helper class for parsing XML into a tree of Node instances for 
077     * a simple way of processing XML. This parser does not preserve the
078     * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
079     * This parser ignores comments and processing instructions and converts the
080     * XML into a Node for each element in the XML with attributes
081     * and child Nodes and Strings. This simple model is sufficient for
082     * most simple use cases of processing XML.
083     * 
084     * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
085     * @version $Revision: 1.4 $
086     */
087    public class XmlParser implements ContentHandler {
088    
089        private StringBuffer bodyText = new StringBuffer();
090        private List stack = new ArrayList();
091        private Locator locator;
092        private XMLReader reader;
093        private Node parent;
094        private boolean trimWhitespace = true;
095    
096        public XmlParser() throws ParserConfigurationException, SAXException {
097            this(false, true);
098        }
099    
100        public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
101            SAXParserFactory factory = null;
102            try {
103                            factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
104                                    public Object run() throws ParserConfigurationException {
105                                            return SAXParserFactory.newInstance();
106                                    }
107                            });
108            } catch (PrivilegedActionException pae) {
109                    Exception e = pae.getException();
110                    if (e instanceof ParserConfigurationException) {
111                            throw (ParserConfigurationException) e;
112                    } else {
113                            throw new RuntimeException(e);
114                    }
115            }
116            factory.setNamespaceAware(namespaceAware);
117            factory.setValidating(validating);
118    
119            SAXParser parser = factory.newSAXParser();
120            reader = parser.getXMLReader();
121        }
122    
123        public XmlParser(XMLReader reader) {
124            this.reader = reader;
125        }
126    
127        public XmlParser(SAXParser parser) throws SAXException {
128            reader = parser.getXMLReader();
129        }
130    
131        
132        /**
133         * Parses the content of the given file as XML turning it into a tree
134         * of Nodes
135         */
136        public Node parse(File file) throws IOException, SAXException {
137    
138            InputSource input = new InputSource(new FileInputStream(file));
139            input.setSystemId("file://" + file.getAbsolutePath());
140            getXMLReader().parse(input);
141            return parent;
142    
143        }
144    
145        /**
146         * Parse the content of the specified input source into a tree of Nodes.
147         */
148        public Node parse(InputSource input) throws IOException, SAXException {
149            getXMLReader().parse(input);
150            return parent;
151        }
152    
153        /**
154         * Parse the content of the specified input stream into a tree of Nodes.
155         * Note that using this method will not provide the parser with any URI
156         * for which to find DTDs etc
157         */
158        public Node parse(InputStream input) throws IOException, SAXException {
159            InputSource is = new InputSource(input);
160            getXMLReader().parse(is);
161            return parent;
162        }
163    
164        /**
165         * Parse the content of the specified reader into a tree of Nodes.
166         * Note that using this method will not provide the parser with any URI
167         * for which to find DTDs etc
168         */
169        public Node parse(Reader in) throws IOException, SAXException {
170            InputSource is = new InputSource(in);
171            getXMLReader().parse(is);
172            return parent;
173        }
174    
175        /**
176         * Parse the content of the specified URI into a tree of Nodes
177         */
178        public Node parse(String uri) throws IOException, SAXException {
179            InputSource is = new InputSource(uri);
180            getXMLReader().parse(is);
181            return parent;
182        }
183    
184        /**
185         * A helper method to parse the given text as XML
186         * 
187         * @param text
188         * @return
189         */
190        public Node parseText(String text) throws IOException, SAXException {
191            return parse(new StringReader(text));
192        }
193        
194    
195        // ContentHandler interface
196        //-------------------------------------------------------------------------                    
197        public void startDocument() throws SAXException {
198            parent = null;
199        }
200    
201        public void endDocument() throws SAXException {
202            stack.clear();
203        }
204    
205        public void startElement(String namespaceURI, String localName, String qName, Attributes list)
206            throws SAXException {
207            addTextToNode();
208            
209            Object name = getElementName(namespaceURI, localName, qName);
210    
211            int size = list.getLength();
212            Map attributes = new HashMap(size);
213            for (int i = 0; i < size; i++) {
214                Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
215                String value = list.getValue(i);
216                attributes.put(attributeName, value);
217            }
218            parent = new Node(parent, name, attributes, new ArrayList());
219            stack.add(parent);
220        }
221    
222        public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
223            addTextToNode();
224            
225            if (!stack.isEmpty()) {
226                stack.remove(stack.size() - 1);
227                if (!stack.isEmpty()) {
228                    parent = (Node) stack.get(stack.size() - 1);
229                }
230            }
231        }
232    
233        public void characters(char buffer[], int start, int length) throws SAXException {
234            bodyText.append(buffer, start, length);
235        }
236    
237        public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
238        }
239    
240        public void endPrefixMapping(String prefix) throws SAXException {
241        }
242    
243        public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
244        }
245    
246        public void processingInstruction(String target, String data) throws SAXException {
247        }
248    
249        public Locator getDocumentLocator() {
250            return locator;
251        }
252    
253        public void setDocumentLocator(Locator locator) {
254            this.locator = locator;
255        }
256    
257        public void skippedEntity(String name) throws SAXException {
258        }
259    
260        // Implementation methods
261        //-------------------------------------------------------------------------           
262        protected XMLReader getXMLReader() {
263            reader.setContentHandler(this);
264            return reader;
265        }
266    
267        protected void addTextToNode() {
268            String text = bodyText.toString();
269            if (trimWhitespace) {
270                text = text.trim();
271            }
272            if (text.length() > 0) {
273                parent.children().add(text);
274            }
275            bodyText = new StringBuffer();
276        }
277    
278        protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
279            String name = localName;
280            if ((name == null) || (name.length() < 1)) {
281                name = qName;
282            }
283            if (namespaceURI == null || namespaceURI.length() <= 0) {
284                return name;
285            }
286            else {
287                return new QName(namespaceURI, name, qName);
288            }
289        }
290    }