001    /**
002     * The contents of this file are subject to the Mozilla Public License Version 1.1
003     * (the "License"); you may not use this file except in compliance with the License.
004     * You may obtain a copy of the License at http://www.mozilla.org/MPL/
005     * Software distributed under the License is distributed on an "AS IS" basis,
006     * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007     * specific language governing rights and limitations under the License.
008     *
009     * The Original Code is "XMLParser.java".  Description:
010     * "Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
011     * specification."
012     *
013     * The Initial Developer of the Original Code is University Health Network. Copyright (C)
014     * 2002.  All Rights Reserved.
015     *
016     * Contributor(s): ______________________________________.
017     *
018     * Alternatively, the contents of this file may be used under the terms of the
019     * GNU General Public License (the  ???GPL???), in which case the provisions of the GPL are
020     * applicable instead of those above.  If you wish to allow use of your version of this
021     * file only under the terms of the GPL and not to allow others to use your version
022     * of this file under the MPL, indicate your decision by deleting  the provisions above
023     * and replace  them with the notice and other provisions required by the GPL License.
024     * If you do not delete the provisions above, a recipient may use your version of
025     * this file under either the MPL or the GPL.
026     */
027    
028    package ca.uhn.hl7v2.parser;
029    
030    import java.io.File;
031    import java.io.FileReader;
032    import java.io.IOException;
033    import java.io.StringReader;
034    import java.io.StringWriter;
035    import java.util.HashSet;
036    
037    import javax.xml.parsers.DocumentBuilder;
038    import javax.xml.parsers.DocumentBuilderFactory;
039    
040    import org.apache.xerces.parsers.DOMParser;
041    import org.apache.xerces.parsers.StandardParserConfiguration;
042    import org.apache.xml.serialize.OutputFormat;
043    import org.apache.xml.serialize.XMLSerializer;
044    import org.w3c.dom.DOMException;
045    import org.w3c.dom.Document;
046    import org.w3c.dom.Element;
047    import org.w3c.dom.Node;
048    import org.w3c.dom.NodeList;
049    import org.w3c.dom.Text;
050    import org.xml.sax.InputSource;
051    import org.xml.sax.SAXException;
052    
053    import ca.uhn.hl7v2.HL7Exception;
054    import ca.uhn.hl7v2.model.Composite;
055    import ca.uhn.hl7v2.model.DataTypeException;
056    import ca.uhn.hl7v2.model.GenericComposite;
057    import ca.uhn.hl7v2.model.GenericMessage;
058    import ca.uhn.hl7v2.model.GenericPrimitive;
059    import ca.uhn.hl7v2.model.Message;
060    import ca.uhn.hl7v2.model.Primitive;
061    import ca.uhn.hl7v2.model.Segment;
062    import ca.uhn.hl7v2.model.Structure;
063    import ca.uhn.hl7v2.model.Type;
064    import ca.uhn.hl7v2.model.Varies;
065    import ca.uhn.hl7v2.util.Terser;
066    import ca.uhn.log.HapiLog;
067    import ca.uhn.log.HapiLogFactory;
068    
069    /**
070     * Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
071     * specification.  This is an abstract class that handles datatype and segment parsing/encoding,
072     * but not the parsing/encoding of entire messages.  To use the XML parser, you should create a
073     * subclass for a certain message structure.  This subclass must be able to identify the Segment
074     * objects that correspond to various Segment nodes in an XML document, and call the methods <code>
075     * parse(Segment segment, ElementNode segmentNode)</code> and <code>encode(Segment segment, ElementNode segmentNode)
076     * </code> as appropriate.  XMLParser uses the Xerces parser, which must be installed in your classpath.
077     * @author Bryan Tripp, Shawn Bellina
078     */
079    public abstract class XMLParser extends Parser {
080    
081        private static final HapiLog log = HapiLogFactory.getHapiLog(XMLParser.class);
082    
083        private DOMParser parser;
084        private String textEncoding;
085    
086        /**
087         * The nodes whose names match these strings will be kept as original, 
088         * meaning that no white space treaming will occur on them
089         */
090        private String[] keepAsOriginalNodes;
091    
092        /**
093         * All keepAsOriginalNodes names, concatenated by a pipe (|)
094         */
095        private String concatKeepAsOriginalNodes = "";
096    
097        /** Constructor */
098        public XMLParser() {
099            this(null);
100        }
101    
102        /** 
103         * Constructor
104         *  
105         * @param theFactory custom factory to use for model class lookup 
106         */
107        public XMLParser(ModelClassFactory theFactory) {
108            parser = new DOMParser(new StandardParserConfiguration());
109            try {
110                parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
111            }
112            catch (Exception e) {
113                log.error("Can't exclude whitespace from XML DOM", e);
114            }
115        }
116        
117        /**
118         * Returns a String representing the encoding of the given message, if
119         * the encoding is recognized.  For example if the given message appears
120         * to be encoded using HL7 2.x XML rules then "XML" would be returned.
121         * If the encoding is not recognized then null is returned.  That this
122         * method returns a specific encoding does not guarantee that the
123         * message is correctly encoded (e.g. well formed XML) - just that
124         * it is not encoded using any other encoding than the one returned.
125         * Returns null if the encoding is not recognized.
126         */
127        public String getEncoding(String message) {
128            String encoding = null;
129    
130            //check for a number of expected strings 
131            String[] expected = { "<MSH.1", "<MSH.2", "</MSH>" };
132            boolean isXML = true;
133            for (int i = 0; i < expected.length; i++) {
134                if (message.indexOf(expected[i]) < 0)
135                    isXML = false;
136            }
137            if (isXML)
138                encoding = "XML";
139    
140            return encoding;
141        }
142    
143        /**
144         * Returns true if and only if the given encoding is supported
145         * by this Parser.
146         */
147        public boolean supportsEncoding(String encoding) {
148            if (encoding.equals("XML")) {
149                return true;
150            }
151            else {
152                return false;
153            }
154        }
155    
156        /**
157         * @return the preferred encoding of this Parser
158         */
159        public String getDefaultEncoding() {
160            return "XML";
161        }
162        
163        /**
164         * Sets the <i>keepAsOriginalNodes<i>
165         * 
166         * The nodes whose names match the <i>keepAsOriginalNodes<i> will be kept as original, 
167         * meaning that no white space treaming will occur on them
168         */
169        public void setKeepAsOriginalNodes(String[] keepAsOriginalNodes) {
170            this.keepAsOriginalNodes = keepAsOriginalNodes;
171    
172            if (keepAsOriginalNodes.length != 0) {
173                //initializes the         
174                StringBuffer strBuf = new StringBuffer(keepAsOriginalNodes[0]);
175                for (int i = 1; i < keepAsOriginalNodes.length; i++) {
176                    strBuf.append("|");
177                    strBuf.append(keepAsOriginalNodes[i]);
178                }
179                concatKeepAsOriginalNodes = strBuf.toString();
180            }
181            else {
182                concatKeepAsOriginalNodes = "";
183            }
184        }
185    
186        /**
187         * Sets the <i>keepAsOriginalNodes<i>
188         */
189        public String[] getKeepAsOriginalNodes() {
190            return keepAsOriginalNodes;
191        }
192    
193        /**
194         * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
195         * <p>The easiest way to implement this method for a particular message structure is as follows:
196         * <ol><li>Create an instance of the Message type you are going to handle with your subclass
197         * of XMLParser</li>
198         * <li>Go through the given Document and find the Elements that represent the top level of
199         * each message segment. </li>
200         * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
201         * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
202         * At the end of this process, your Message object should be populated with data from the XML
203         * Document.</p>
204         * @throws HL7Exception if the message is not correctly formatted.
205         * @throws EncodingNotSupportedException if the message encoded
206         *      is not supported by this parser.
207         */
208        public abstract Message parseDocument(Document XMLMessage, String version) throws HL7Exception;
209    
210        /**
211         * <p>Parses a message string and returns the corresponding Message
212         * object.  This method checks that the given message string is XML encoded, creates an
213         * XML Document object (using Xerces) from the given String, and calls the abstract
214         * method <code>parse(Document XMLMessage)</code></p>
215         */
216        protected Message doParse(String message, String version) throws HL7Exception, EncodingNotSupportedException {
217            Message m = null;
218    
219            //parse message string into a DOM document 
220            try {
221                Document doc = null;
222                synchronized (this) {
223                    parser.parse(new InputSource(new StringReader(message)));
224                    doc = parser.getDocument();
225                }
226                m = parseDocument(doc, version);
227            }
228            catch (SAXException e) {
229                throw new HL7Exception("SAXException parsing XML", HL7Exception.APPLICATION_INTERNAL_ERROR, e);
230            }
231            catch (IOException e) {
232                throw new HL7Exception("IOException parsing XML", HL7Exception.APPLICATION_INTERNAL_ERROR, e);
233            }
234    
235            return m;
236        }
237    
238        /**
239         * Formats a Message object into an HL7 message string using the given
240         * encoding.
241         * @throws HL7Exception if the data fields in the message do not permit encoding
242         *      (e.g. required fields are null)
243         * @throws EncodingNotSupportedException if the requested encoding is not
244         *      supported by this parser.
245         */
246        protected String doEncode(Message source, String encoding) throws HL7Exception, EncodingNotSupportedException {
247            if (!encoding.equals("XML"))
248                throw new EncodingNotSupportedException("XMLParser supports only XML encoding");
249            return encode(source);
250        }
251    
252        /**
253         * Formats a Message object into an HL7 message string using this parser's
254         * default encoding (XML encoding). This method calls the abstract method
255         * <code>encodeDocument(...)</code> in order to obtain XML Document object
256         * representation of the Message, then serializes it to a String.
257         * @throws HL7Exception if the data fields in the message do not permit encoding
258         *      (e.g. required fields are null)
259         */
260        protected String doEncode(Message source) throws HL7Exception {
261            if (source instanceof GenericMessage) {
262                throw new HL7Exception("Can't XML-encode a GenericMessage.  Message must have a recognized structure.");
263            }
264            
265            Document doc = encodeDocument(source);
266            doc.getDocumentElement().setAttribute("xmlns", "urn:hl7-org:v2xml");
267            
268            StringWriter out = new StringWriter();
269    
270            OutputFormat outputFormat = new OutputFormat("", null, true);
271            if (textEncoding != null) {
272                    outputFormat.setEncoding(textEncoding);
273            }
274            
275            XMLSerializer ser = new XMLSerializer(out, outputFormat); //default output format
276            try {
277                ser.serialize(doc);
278            }
279            catch (IOException e) {
280                throw new HL7Exception(
281                    "IOException serializing XML document to string",
282                    HL7Exception.APPLICATION_INTERNAL_ERROR,
283                    e);
284            }
285            return out.toString();
286        }
287    
288        /**
289         * <p>Creates an XML Document that corresponds to the given Message object. </p>
290         * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
291         * into it that correspond to the groups and segments that belong to the message type that your subclass
292         * of XMLParser supports.  Then, for each segment in the message, call the method
293         * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
294         * that segment and the corresponding Segment object from the given Message.</p>
295         */
296        public abstract Document encodeDocument(Message source) throws HL7Exception;
297    
298        /** 
299         * Populates the given Segment object with data from the given XML Element.
300         * @throws HL7Exception if the XML Element does not have the correct name and structure
301         *      for the given Segment, or if there is an error while setting individual field values.
302         */
303        public void parse(Segment segmentObject, Element segmentElement) throws HL7Exception {
304            HashSet done = new HashSet();
305            
306    //        for (int i = 1; i <= segmentObject.numFields(); i++) {
307    //            String elementName = makeElementName(segmentObject, i);
308    //            done.add(elementName);
309    //            parseReps(segmentObject, segmentElement, elementName, i);
310    //        }
311            
312            NodeList all = segmentElement.getChildNodes();
313            for (int i = 0; i < all.getLength(); i++) {
314                String elementName = all.item(i).getNodeName();
315                if (all.item(i).getNodeType() == Node.ELEMENT_NODE && !done.contains(elementName)) {
316                    done.add(elementName);
317                    
318                    int index = elementName.indexOf('.');
319                    if (index >= 0 && elementName.length() > index) { //properly formatted element
320                        String fieldNumString = elementName.substring(index + 1);
321                        int fieldNum = Integer.parseInt(fieldNumString);
322                        parseReps(segmentObject, segmentElement, elementName, fieldNum);                        
323                    } else {                        
324                        log.debug("Child of segment " + segmentObject.getName() 
325                                + " doesn't look like a field: " + elementName);
326                    }
327                }
328            }
329    
330            //set data type of OBX-5        
331            if (segmentObject.getClass().getName().indexOf("OBX") >= 0) {
332                Varies.fixOBX5(segmentObject, getFactory());
333            }
334        }
335        
336        private void parseReps(Segment segmentObject, Element segmentElement, String fieldName, int fieldNum) 
337                 throws DataTypeException, HL7Exception {
338            
339            NodeList reps = segmentElement.getElementsByTagName(fieldName);
340            for (int i = 0; i < reps.getLength(); i++) {
341                parse(segmentObject.getField(fieldNum, i), (Element) reps.item(i));
342            }        
343        }
344    
345        /**
346         * Populates the given Element with data from the given Segment, by inserting
347         * Elements corresponding to the Segment's fields, their components, etc.  Returns 
348         * true if there is at least one data value in the segment.   
349         */
350        public boolean encode(Segment segmentObject, Element segmentElement) throws HL7Exception {
351            boolean hasValue = false;
352            int n = segmentObject.numFields();
353            for (int i = 1; i <= n; i++) {
354                String name = makeElementName(segmentObject, i);
355                Type[] reps = segmentObject.getField(i);
356                for (int j = 0; j < reps.length; j++) {
357                    Element newNode = segmentElement.getOwnerDocument().createElement(name);
358                    boolean componentHasValue = encode(reps[j], newNode);
359                    if (componentHasValue) {
360                        try {
361                            segmentElement.appendChild(newNode);
362                        }
363                        catch (DOMException e) {
364                            throw new HL7Exception(
365                                "DOMException encoding Segment: ",
366                                HL7Exception.APPLICATION_INTERNAL_ERROR,
367                                e);
368                        }
369                        hasValue = true;
370                    }
371                }
372            }
373            return hasValue;
374        }
375    
376        /**
377         * Populates the given Type object with data from the given XML Element.
378         */
379        public void parse(Type datatypeObject, Element datatypeElement) throws DataTypeException {
380            if (datatypeObject instanceof Varies) {
381                parseVaries((Varies) datatypeObject, datatypeElement);
382            }
383            else if (datatypeObject instanceof Primitive) {
384                parsePrimitive((Primitive) datatypeObject, datatypeElement);
385            }
386            else if (datatypeObject instanceof Composite) {
387                parseComposite((Composite) datatypeObject, datatypeElement);
388            }
389        }
390    
391        /**
392         * Parses an XML element into a Varies by determining whether the element is primitive or 
393         * composite, calling setData() on the Varies with a new generic primitive or composite as appropriate, 
394         * and then calling parse again with the new Type object.  
395         */
396        private void parseVaries(Varies datatypeObject, Element datatypeElement) throws DataTypeException {
397            //figure out what data type it holds 
398            //short nodeType = datatypeElement.getFirstChild().getNodeType();        
399            if (!hasChildElement(datatypeElement)) {
400                //it's a primitive 
401                datatypeObject.setData(new GenericPrimitive(datatypeObject.getMessage()));
402            }
403            else {
404                //it's a composite ... almost know what type, except that we don't have the version here 
405                datatypeObject.setData(new GenericComposite(datatypeObject.getMessage()));
406            }
407            parse(datatypeObject.getData(), datatypeElement);
408        }
409    
410        /** Returns true if any of the given element's children are elements */
411        private boolean hasChildElement(Element e) {
412            NodeList children = e.getChildNodes();
413            boolean hasElement = false;
414            int c = 0;
415            while (c < children.getLength() && !hasElement) {
416                if (children.item(c).getNodeType() == Node.ELEMENT_NODE) {
417                    hasElement = true;
418                }
419                c++;
420            }
421            return hasElement;
422        }
423    
424        /** Parses a primitive type by filling it with text child, if any */
425        private void parsePrimitive(Primitive datatypeObject, Element datatypeElement) throws DataTypeException {
426            NodeList children = datatypeElement.getChildNodes();
427            int c = 0;
428            boolean full = false;
429            while (c < children.getLength() && !full) {
430                Node child = children.item(c++);
431                if (child.getNodeType() == Node.TEXT_NODE) {
432                    try {
433                        if (child.getNodeValue() != null && !child.getNodeValue().equals("")) {
434                            if (keepAsOriginal(child.getParentNode())) {
435                                datatypeObject.setValue(child.getNodeValue());
436                            }
437                            else {
438                                datatypeObject.setValue(removeWhitespace(child.getNodeValue()));
439                            }
440                        }
441                    }
442                    catch (DOMException e) {
443                        log.error("Error parsing primitive value from TEXT_NODE", e);
444                    }
445                    full = true;
446                }
447            }
448        }
449    
450        /**
451         * Checks if <code>Node</code> content should be kept as original (ie.: whitespaces won't be removed)
452         * 
453         * @param node The target <code>Node</code> 
454         * @return boolean <code>true</code> if whitespaces should not be removed from node content, 
455         *                 <code>false</code> otherwise
456         */
457        protected boolean keepAsOriginal(Node node) {
458            if (node.getNodeName() == null)
459                return false;
460            return concatKeepAsOriginalNodes.indexOf(node.getNodeName()) != -1;
461        }
462    
463        /** 
464         * Removes all unecessary whitespace from the given String (intended to be used with Primitive values).  
465         * This includes leading and trailing whitespace, and repeated space characters.  Carriage returns, 
466         * line feeds, and tabs are replaced with spaces. 
467         */
468        protected String removeWhitespace(String s) {
469            s = s.replace('\r', ' ');
470            s = s.replace('\n', ' ');
471            s = s.replace('\t', ' ');
472    
473            boolean repeatedSpacesExist = true;
474            while (repeatedSpacesExist) {
475                int loc = s.indexOf("  ");
476                if (loc < 0) {
477                    repeatedSpacesExist = false;
478                }
479                else {
480                    StringBuffer buf = new StringBuffer();
481                    buf.append(s.substring(0, loc));
482                    buf.append(" ");
483                    buf.append(s.substring(loc + 2));
484                    s = buf.toString();
485                }
486            }
487            return s.trim();
488        }
489    
490        /**
491         * Populates a Composite type by looping through it's children, finding corresponding 
492         * Elements among the children of the given Element, and calling parse(Type, Element) for
493         * each.
494         */
495        private void parseComposite(Composite datatypeObject, Element datatypeElement) throws DataTypeException {
496            if (datatypeObject instanceof GenericComposite) { //elements won't be named GenericComposite.x
497                NodeList children = datatypeElement.getChildNodes();
498                int compNum = 0;
499                for (int i = 0; i < children.getLength(); i++) {
500                    if (children.item(i).getNodeType() == Node.ELEMENT_NODE) {
501                        parse(datatypeObject.getComponent(compNum), (Element) children.item(i));
502                        compNum++;
503                    }
504                }
505            }
506            else {
507                Type[] children = datatypeObject.getComponents();
508                for (int i = 0; i < children.length; i++) {
509                    NodeList matchingElements =
510                        datatypeElement.getElementsByTagName(makeElementName(datatypeObject, i + 1));
511                    if (matchingElements.getLength() > 0) {
512                        parse(children[i], (Element) matchingElements.item(0)); //components don't repeat - use 1st
513                    }
514                }
515            }
516        }
517    
518        /** 
519         * Returns the expected XML element name for the given child of a message constituent 
520         * of the given class (the class should be a Composite or Segment class). 
521         */
522        /*private String makeElementName(Class c, int child) {
523            String longClassName = c.getName();
524            String shortClassName = longClassName.substring(longClassName.lastIndexOf('.') + 1, longClassName.length());
525            if (shortClassName.startsWith("Valid")) {
526                shortClassName = shortClassName.substring(5, shortClassName.length());
527            }
528            return shortClassName + "." + child;
529        }*/
530    
531        /** Returns the expected XML element name for the given child of the given Segment */
532        private String makeElementName(Segment s, int child) {
533            return s.getName() + "." + child;
534        }
535    
536        /** Returns the expected XML element name for the given child of the given Composite */
537        private String makeElementName(Composite composite, int child) {
538            return composite.getName() + "." + child;
539        }
540    
541        /**
542         * Populates the given Element with data from the given Type, by inserting
543         * Elements corresponding to the Type's components and values.  Returns true if 
544         * the given type contains a value (i.e. for Primitives, if getValue() doesn't 
545         * return null, and for Composites, if at least one underlying Primitive doesn't 
546         * return null).
547         */
548        private boolean encode(Type datatypeObject, Element datatypeElement) throws DataTypeException {
549            boolean hasData = false;
550            if (datatypeObject instanceof Varies) {
551                hasData = encodeVaries((Varies) datatypeObject, datatypeElement);
552            }
553            else if (datatypeObject instanceof Primitive) {
554                hasData = encodePrimitive((Primitive) datatypeObject, datatypeElement);
555            }
556            else if (datatypeObject instanceof Composite) {
557                hasData = encodeComposite((Composite) datatypeObject, datatypeElement);
558            }
559            return hasData;
560        }
561    
562        /**
563         * Encodes a Varies type by extracting it's data field and encoding that.  Returns true 
564         * if the data field (or one of its components) contains a value.  
565         */
566        private boolean encodeVaries(Varies datatypeObject, Element datatypeElement) throws DataTypeException {
567            boolean hasData = false;
568            if (datatypeObject.getData() != null) {
569                hasData = encode(datatypeObject.getData(), datatypeElement);
570            }
571            return hasData;
572        }
573    
574        /** 
575         * Encodes a Primitive in XML by adding it's value as a child of the given Element.  
576         * Returns true if the given Primitive contains a value.  
577         */
578        private boolean encodePrimitive(Primitive datatypeObject, Element datatypeElement) throws DataTypeException {
579            boolean hasValue = false;
580            if (datatypeObject.getValue() != null && !datatypeObject.getValue().equals(""))
581                hasValue = true;
582    
583            Text t = datatypeElement.getOwnerDocument().createTextNode(datatypeObject.getValue());
584            if (hasValue) {
585                try {
586                    datatypeElement.appendChild(t);
587                }
588                catch (DOMException e) {
589                    throw new DataTypeException("DOMException encoding Primitive: ", e);
590                }
591            }
592            return hasValue;
593        }
594    
595        /**
596         * Encodes a Composite in XML by looping through it's components, creating new 
597         * children for each of them (with the appropriate names) and populating them by 
598         * calling encode(Type, Element) using these children.  Returns true if at least 
599         * one component contains a value.  
600         */
601        private boolean encodeComposite(Composite datatypeObject, Element datatypeElement) throws DataTypeException {
602            Type[] components = datatypeObject.getComponents();
603            boolean hasValue = false;
604            for (int i = 0; i < components.length; i++) {
605                String name = makeElementName(datatypeObject, i + 1);
606                Element newNode = datatypeElement.getOwnerDocument().createElement(name);
607                boolean componentHasValue = encode(components[i], newNode);
608                if (componentHasValue) {
609                    try {
610                        datatypeElement.appendChild(newNode);
611                    }
612                    catch (DOMException e) {
613                        throw new DataTypeException("DOMException encoding Composite: ", e);
614                    }
615                    hasValue = true;
616                }
617            }
618            return hasValue;
619        }
620    
621        /**
622         * <p>Returns a minimal amount of data from a message string, including only the
623         * data needed to send a response to the remote system.  This includes the
624         * following fields:
625         * <ul><li>field separator</li>
626         * <li>encoding characters</li>
627         * <li>processing ID</li>
628         * <li>message control ID</li></ul>
629         * This method is intended for use when there is an error parsing a message,
630         * (so the Message object is unavailable) but an error message must be sent
631         * back to the remote system including some of the information in the inbound
632         * message.  This method parses only that required information, hopefully
633         * avoiding the condition that caused the original error.</p>
634         */
635        public Segment getCriticalResponseData(String message) throws HL7Exception {
636            String version = getVersion(message);
637            Segment criticalData = Parser.makeControlMSH(version, getFactory());
638    
639            Terser.set(criticalData, 1, 0, 1, 1, parseLeaf(message, "MSH.1", 0));
640            Terser.set(criticalData, 2, 0, 1, 1, parseLeaf(message, "MSH.2", 0));
641            Terser.set(criticalData, 10, 0, 1, 1, parseLeaf(message, "MSH.10", 0));
642            String procID = parseLeaf(message, "MSH.11", 0);
643            if (procID == null || procID.length() == 0) {
644                procID = parseLeaf(message, "PT.1", message.indexOf("MSH.11"));
645                //this field is a composite in later versions
646            }
647            Terser.set(criticalData, 11, 0, 1, 1, procID);
648    
649            return criticalData;
650        }
651    
652        /**
653         * For response messages, returns the value of MSA-2 (the message ID of the message
654         * sent by the sending system).  This value may be needed prior to main message parsing,
655         * so that (particularly in a multi-threaded scenario) the message can be routed to
656         * the thread that sent the request.  We need this information first so that any
657         * parse exceptions are thrown to the correct thread.  Implementers of Parsers should
658         * take care to make the implementation of this method very fast and robust.
659         * Returns null if MSA-2 can not be found (e.g. if the message is not a
660         * response message).  Trims whitespace from around the MSA-2 field.  
661         */
662        public String getAckID(String message) {
663            String ackID = null;
664            try {
665                ackID = parseLeaf(message, "msa.2", 0).trim();
666            }
667            catch (HL7Exception e) { /* OK ... assume it isn't a response message */
668            }
669            return ackID;
670        }
671    
672        public String getVersion(String message) throws HL7Exception {
673            String version = parseLeaf(message, "MSH.12", 0);
674            if (version == null || version.trim().length() == 0) {
675                version = parseLeaf(message, "VID.1", message.indexOf("MSH.12"));
676            }
677            return version;
678        }
679    
680        /**
681         * Attempts to retrieve the value of a leaf tag without using DOM or SAX.  
682         * This method searches the given message string for the given tag name, and returns 
683         * everything after the given tag and before the start of the next tag.  Whitespace
684         * is stripped.  This is intended only for lead nodes, as the value is considered to 
685         * end at the start of the next tag, regardless of whether it is the matching end 
686         * tag or some other nested tag.  
687         * @param message a string message in XML form
688         * @param tagName the name of the XML tag, e.g. "MSA.2"
689         * @param startAt the character location at which to start searching
690         * @throws HL7Exception if the tag can not be found
691         */
692        protected String parseLeaf(String message, String tagName, int startAt) throws HL7Exception {
693            String value = null;
694    
695            int tagStart = message.indexOf("<" + tagName, startAt);
696            if (tagStart < 0)
697                tagStart = message.indexOf("<" + tagName.toUpperCase(), startAt);
698            int valStart = message.indexOf(">", tagStart) + 1;
699            int valEnd = message.indexOf("<", valStart);
700    
701            if (tagStart >= 0 && valEnd >= valStart) {
702                value = message.substring(valStart, valEnd);
703            }
704            else {
705                throw new HL7Exception(
706                    "Couldn't find "
707                        + tagName
708                        + " in message beginning: "
709                        + message.substring(0, Math.min(150, message.length())),
710                    HL7Exception.REQUIRED_FIELD_MISSING);
711            }
712    
713            // Escape codes, as defined at http://hdf.ncsa.uiuc.edu/HDF5/XML/xml_escape_chars.htm
714            value = value.replaceAll("&quot;", "\"");
715            value = value.replaceAll("&apos;", "'");
716            value = value.replaceAll("&amp;", "&");
717            value = value.replaceAll("&lt;", "<");
718            value = value.replaceAll("&gt;", ">");
719    
720            return value;
721        }
722    
723        /**
724         * Throws unsupported operation exception
725         *
726         * @throws Unsupported operation exception
727         */
728        @Override
729        public String doEncode(Segment structure, EncodingCharacters encodingCharacters) throws HL7Exception {
730            throw new UnsupportedOperationException("Not supported yet.");
731        }
732    
733        /**
734         * Throws unsupported operation exception
735         *
736         * @throws Unsupported operation exception
737         */
738        @Override
739        public void parse(Message message, String string) throws HL7Exception {
740            throw new UnsupportedOperationException("Not supported yet.");
741        }
742    
743    
744        /**
745         * Throws unsupported operation exception
746         *
747         * @throws Unsupported operation exception
748         */
749        @Override
750        public String doEncode(Type type, EncodingCharacters encodingCharacters) throws HL7Exception {
751            throw new UnsupportedOperationException("Not supported yet.");
752        }
753    
754        /**
755         * Throws unsupported operation exception
756         *
757         * @throws Unsupported operation exception
758         */
759        @Override
760        public void parse(Type type, String string, EncodingCharacters encodingCharacters) throws HL7Exception {
761            throw new UnsupportedOperationException("Not supported yet.");
762        }
763    
764        /**
765         * Throws unsupported operation exception
766         *
767         * @throws Unsupported operation exception
768         */
769        @Override
770        public void parse(Segment segment, String string, EncodingCharacters encodingCharacters) throws HL7Exception {
771            throw new UnsupportedOperationException("Not supported yet.");
772        }
773    
774    
775        /** Test harness */
776        public static void main(String args[]) {
777            if (args.length != 1) {
778                System.out.println("Usage: XMLParser pipe_encoded_file");
779                System.exit(1);
780            }
781    
782            //read and parse message from file 
783            try {
784                PipeParser parser = new PipeParser();
785                File messageFile = new File(args[0]);
786                long fileLength = messageFile.length();
787                FileReader r = new FileReader(messageFile);
788                char[] cbuf = new char[(int) fileLength];
789                System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
790                r.close();
791                String messString = String.valueOf(cbuf);
792                Message mess = parser.parse(messString);
793                System.out.println("Got message of type " + mess.getClass().getName());
794    
795                ca.uhn.hl7v2.parser.XMLParser xp = new XMLParser() {
796                    public Message parseDocument(Document XMLMessage, String version) throws HL7Exception {
797                        return null;
798                    }
799                    public Document encodeDocument(Message source) throws HL7Exception {
800                        return null;
801                    }
802                    public String getVersion(String message) throws HL7Exception {
803                        return null;
804                    }
805    
806                    @Override
807                    public String doEncode(Segment structure, EncodingCharacters encodingCharacters) throws HL7Exception {
808                        throw new UnsupportedOperationException("Not supported yet.");
809                    }
810    
811                    @Override
812                    public String doEncode(Type type, EncodingCharacters encodingCharacters) throws HL7Exception {
813                        throw new UnsupportedOperationException("Not supported yet.");
814                    }
815    
816                    @Override
817                    public void parse(Type type, String string, EncodingCharacters encodingCharacters) throws HL7Exception {
818                        throw new UnsupportedOperationException("Not supported yet.");
819                    }
820    
821                    @Override
822                    public void parse(Segment segment, String string, EncodingCharacters encodingCharacters) throws HL7Exception {
823                        throw new UnsupportedOperationException("Not supported yet.");
824                    }
825                };
826    
827                //loop through segment children of message, encode, print to console
828                String[] structNames = mess.getNames();
829                for (int i = 0; i < structNames.length; i++) {
830                    Structure[] reps = mess.getAll(structNames[i]);
831                    for (int j = 0; j < reps.length; j++) {
832                        if (Segment.class.isAssignableFrom(reps[j].getClass())) { //ignore groups
833                            DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
834                            Document doc = docBuilder.newDocument(); //new doc for each segment
835                            Element root = doc.createElement(reps[j].getClass().getName());
836                            doc.appendChild(root);
837                            xp.encode((Segment) reps[j], root);
838                            StringWriter out = new StringWriter();
839                            XMLSerializer ser = new XMLSerializer(out, null); //default output format
840                            ser.serialize(doc);
841                            System.out.println("Segment " + reps[j].getClass().getName() + ": \r\n" + out.toString());
842    
843                            Class[] segmentConstructTypes = { Message.class };
844                            Object[] segmentConstructArgs = { null };
845                            Segment s =
846                                (Segment) reps[j].getClass().getConstructor(segmentConstructTypes).newInstance(
847                                    segmentConstructArgs);
848                            xp.parse(s, root);
849                            Document doc2 = docBuilder.newDocument();
850                            Element root2 = doc2.createElement(s.getClass().getName());
851                            doc2.appendChild(root2);
852                            xp.encode(s, root2);
853                            StringWriter out2 = new StringWriter();
854                            ser = new XMLSerializer(out2, null); //default output format
855                            ser.serialize(doc2);
856                            if (out2.toString().equals(out.toString())) {
857                                System.out.println("Re-encode OK");
858                            }
859                            else {
860                                System.out.println(
861                                    "Warning: XML different after parse and re-encode: \r\n" + out2.toString());
862                            }
863                        }
864                    }
865                }
866    
867            }
868            catch (Exception e) {
869                e.printStackTrace();
870            }
871        }
872    
873        /**
874         * Returns the text encoding to be used in generating new messages. Note that this affects encoding to string only, not parsing.
875         * @return
876         */
877            public String getTextEncoding() {
878                    return textEncoding;
879            }
880    
881            /**
882             * Sets the text encoding to be used in generating new messages. Note that this affects encoding to string only, not parsing.
883             * @param textEncoding The encoding. Default is the platform default.
884             */
885            public void setTextEncoding(String textEncoding) {
886                    this.textEncoding = textEncoding;
887            }
888    
889    }