001    package ca.uhn.hl7v2.preparser;
002    
003    import java.lang.reflect.* ;
004    import java.util.* ;
005    import ca.uhn.hl7v2.parser.* ;
006    
007    /*
008    The point of this class (all static members, not instantiatable) is to take a
009    traditionally-encoded HL7 message and add all it's contents to a Properties
010    object, via the parseMessage() method.
011    
012    The key-value pairs added to the Properties argument have keys that represent a
013    datum's location in the message.  (in the ZYX-1-2[0] style.  TODO: define
014    exactly.)  See Datum, particularly the toString() of that class.
015    Anyway, the Properties keys are those and the values are the tokens found.
016    
017    Note: we accept useless field repetition separators at the end of a 
018    field repetition sequence.  i.e. |855-4545~555-3792~~~| , and interpret this
019    as definining repetitions 0 and 1.  This might not be allowed.  (HL7 2.3.1
020    section 2.10 explicitly allows this behaviour for fields / components /
021    subcomponents, but the allowance is notably absent for repetitions.  TODO:
022    nail down.)  We allow it anyway.
023    
024    Also, we accept things like |855-4545~~555-3792|, and interpret it as defining
025    repetitions 0 and 2.  The spec would seem to disallow this too, but there's no
026    harm.  :D  
027    */
028    
029    public class ER7 {
030            
031            private ER7() {}
032    
033            /** characters that delimit segments.  for use with StringTokenizer.
034            We are forgiving: HL7 2.3.1 section 2.7 says that carriage return ('\r') is
035            the only segment delimiter.  TODO: check other versions. */ 
036            static final String segmentSeparators = "\r\n\f";
037    
038            /** Parses message and dumps contents to props, with keys in the 
039            ZYX[a]-b[c]-d-e style.
040            */
041            public static boolean parseMessage(/*out*/ Properties props, 
042                    /*in*/ Vector /*<DatumPath>*/ msgMask, /*in*/ String message)
043            {
044                    boolean ok = false;
045                    if(message != null) {
046                            if(props == null)
047                                    props = new Properties();
048    
049                            StringTokenizer messageTokenizer 
050                                    = new StringTokenizer(message, segmentSeparators);
051                            if(messageTokenizer.hasMoreTokens()) {
052                                    String firstSegment = messageTokenizer.nextToken();
053                                    EncodingCharacters encodingChars = new EncodingCharacters('0', "0000");
054                                    if(parseMSHSegmentWhole(props, msgMask, encodingChars, firstSegment)) {
055                                            ok = true;
056                                            TreeMap /*<String -> Integer>*/ segmentId2nextRepIdx = new TreeMap();
057                                            segmentId2nextRepIdx.put(new String("MSH"), new Integer(1)); 
058                                                    // in case we find another MSH segment, heh.
059                                            while(messageTokenizer.hasMoreTokens()) {
060                                                    parseSegmentWhole(props, segmentId2nextRepIdx, 
061                                                            msgMask, encodingChars, messageTokenizer.nextToken());
062                                            }
063                                    }
064                            }
065                    }
066                    return ok;
067            }
068            
069            /** given segment, starting with "MSH", then encoding characters, etc...
070            put MSH[0]-1[0]-1-1 (== MSH-1) and MSH[0]-2[0]-1-1 (== MSH-2) into props, if found,
071            plus everything else found in 'segment' */
072            protected static boolean parseMSHSegmentWhole(/*out*/ Properties props, 
073                    /*in*/ Vector /*<DatumPath>*/ msgMask, /*in*/ EncodingCharacters encodingChars, 
074                    /*in*/ String segment) 
075            {
076                    boolean ret = false;
077                    try {
078                            ER7SegmentHandler handler = new ER7SegmentHandler();
079                            handler.m_props = props;
080                            handler.m_encodingChars = encodingChars;
081                            handler.m_segmentId = "MSH";
082                            handler.m_segmentRepIdx = 0;
083                            if(msgMask != null)
084                                    handler.m_msgMask = msgMask;
085                            else {
086                                    handler.m_msgMask = new Vector();
087                                    handler.m_msgMask.add(new DatumPath()); // everything will pass this
088                                            // (every DatumPath startsWith the zero-length DatumPath)
089                            }
090    
091                            encodingChars.setFieldSeparator(segment.charAt(3));
092                            Vector /*<Integer>*/ nodeKey = new Vector();
093                            nodeKey.add(new Integer(0));
094                            handler.putDatum(nodeKey, String.valueOf(encodingChars.getFieldSeparator()));
095                            encodingChars.setComponentSeparator(segment.charAt(4));
096                            encodingChars.setRepetitionSeparator(segment.charAt(5));
097                            encodingChars.setEscapeCharacter(segment.charAt(6));
098                            encodingChars.setSubcomponentSeparator(segment.charAt(7));
099                            nodeKey.set(0, new Integer(1));
100                            handler.putDatum(nodeKey, encodingChars.toString());
101    
102                            if(segment.charAt(8) == encodingChars.getFieldSeparator()) {    
103                                    ret = true; 
104                                    // now -- we recurse 
105                                    // through fields / field-repetitions / components / subcomponents.
106                                    nodeKey.clear();
107                                    nodeKey.add(new Integer(2));
108                                    parseSegmentGuts(handler, segment.substring(9), nodeKey);
109                            }
110                    }
111                    catch(IndexOutOfBoundsException e) {}
112                    catch(NullPointerException e) {}
113    
114                    return ret;
115            }
116    
117            /** pass in a whole segment (of type other than MSH), including message type
118            at the start, according to encodingChars, and we'll parse the contents and
119            put them in props. */
120            protected static void parseSegmentWhole(/*out*/ Properties props, 
121                    /*in/out*/ Map /*<String -> Integer>*/ segmentId2nextRepIdx, 
122                    /*in*/ Vector /*<DatumPath>*/ msgMask, /*in*/ EncodingCharacters encodingChars, 
123                    /*in*/ String segment)
124            {
125                    try {
126                            String segmentId = segment.substring(0, 3);
127    
128                            int currentSegmentRepIdx = 0;
129                            if(segmentId2nextRepIdx.containsKey(segmentId))
130                                    currentSegmentRepIdx = ((Integer)segmentId2nextRepIdx.get(segmentId)).intValue();
131                            else
132                                    currentSegmentRepIdx = 0;
133                            segmentId2nextRepIdx.put(segmentId, new Integer(currentSegmentRepIdx+1));
134    
135                            // will only bother to parse this segment if any of it's contents will 
136                            // be dumped to props.
137                            boolean parseThisSegment = false;
138                            DatumPath segmentIdAsDatumPath = (new DatumPath()).add(segmentId);
139                            for(Iterator maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
140                                    parseThisSegment = segmentIdAsDatumPath.startsWith((DatumPath)(maskIt.next()));
141                            for(Iterator maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
142                                    parseThisSegment = ((DatumPath)(maskIt.next())).startsWith(segmentIdAsDatumPath);
143    
144                            if(parseThisSegment && (segment.charAt(3) == encodingChars.getFieldSeparator())) {
145                                    ER7SegmentHandler handler = new ER7SegmentHandler();
146                                    handler.m_props = props;
147                                    handler.m_encodingChars = encodingChars;
148                                    handler.m_segmentId = segmentId;
149                                    if(msgMask != null)
150                                            handler.m_msgMask = msgMask;
151                                    else {
152                                            handler.m_msgMask = new Vector();
153                                            handler.m_msgMask.add(new DatumPath()); // everything will pass this
154                                                    // (every DatumPath startsWith the zero-length DatumPath)
155                                    }
156                                    handler.m_segmentRepIdx = currentSegmentRepIdx;
157    
158                                    Vector nodeKey = new Vector();
159                                    nodeKey.add(new Integer(0));
160                                    parseSegmentGuts(handler, segment.substring(4), nodeKey);
161                            }
162                    }
163                    catch(NullPointerException e) {}
164                    catch(IndexOutOfBoundsException e) {}
165            }
166    
167            static protected interface Handler
168            {
169                    public int specDepth();
170                    public char delim(int level);
171    
172                    public void putDatum(Vector/*<Integer>*/ nodeKey, String value);
173            }
174    
175            static protected class ER7SegmentHandler implements Handler
176            {
177                    Properties m_props;
178    
179                    EncodingCharacters m_encodingChars;
180    
181                    String m_segmentId;
182                    int m_segmentRepIdx;
183    
184                    Vector /*<DatumPath>*/ m_msgMask;
185    
186                    public int specDepth() {return 4;}
187    
188                    public char delim(int level)
189                    {
190                            if(level == 0)
191                                    return m_encodingChars.getFieldSeparator();
192                            else if(level == 1)
193                                    return m_encodingChars.getRepetitionSeparator();
194                            else if(level == 2)
195                                    return m_encodingChars.getComponentSeparator();
196                            else if(level == 3)
197                                    return m_encodingChars.getSubcomponentSeparator();
198                            else
199                                    throw new java.lang.Error();
200                    }
201    
202                    public void putDatum(Vector /*<Integer>*/ valNodeKey, String value)
203                    {
204                            // make a DatumPath from valNodeKey and info in this: 
205                            DatumPath valDatumPath = new DatumPath();
206                            valDatumPath.add(m_segmentId).add(m_segmentRepIdx);
207                            for(int i=0; i<valNodeKey.size(); ++i) {
208                                    // valNodeKey: everything counts from 0 -- not so with DatumPath ... sigh. 
209                                    int itval = ((Integer)valNodeKey.get(i)).intValue();
210                                    valDatumPath.add(new Integer(i == 1 ? itval : itval+1));
211                            }
212    
213                            // see if valDatumPath passes m_msgMask: 
214                            boolean valDatumPathPassesMask = false;
215                            for(Iterator maskIt = m_msgMask.iterator(); 
216                                    !valDatumPathPassesMask && maskIt.hasNext(); )
217                            {
218                                    valDatumPathPassesMask = valDatumPath.startsWith((DatumPath)(maskIt.next()));
219                            }
220    
221                            if(valDatumPathPassesMask)
222                                    m_props.setProperty(valDatumPath.toString(), value);
223                    }
224            }
225    
226            /** recursively tokenize "guts" (a segment, or part of one) into tokens, 
227            according to separators (aka delimiters) which are different at each level
228            of recursion, and to a recursive depth which is discovered through "handler"
229            via handler.delim(int) and handler.specDepth()  As tokens are found, they
230            are reported to handler via handler.putDatum(), which presumably stashes them
231            away somewhere.  We tell the handler about the location in the message via
232            putDatum()'s key argument, which is a List of Integers representing the 
233            position in the parse tree (size() == depth of recursion).
234    
235            TODO: say more.
236            */
237            protected static void parseSegmentGuts(/*in/out*/ Handler handler,  
238                    /*in*/ String guts, /*in*/Vector/*<Integer>*/ nodeKey)
239            {
240                    char thisDepthsDelim = handler.delim(nodeKey.size()-1);
241                    //nodeKey.add(new Integer(0)); // will change nodeKey back before function exits
242    
243                    StringTokenizer gutsTokenizer 
244                            = new StringTokenizer(guts, String.valueOf(thisDepthsDelim), true);
245                    while(gutsTokenizer.hasMoreTokens()) {
246                            String gutsToken = gutsTokenizer.nextToken();
247    
248                            if(gutsToken.charAt(0) == thisDepthsDelim) {
249                                    // gutsToken is all delims -- skipping over as many fields or
250                                    // components or whatevers as there are characters in the token: 
251                                    int oldvalue = ((Integer)nodeKey.get(nodeKey.size()-1)).intValue();
252                                    nodeKey.set(nodeKey.size()-1, new Integer(oldvalue + gutsToken.length()));
253                            }
254                            else {
255                                    if(nodeKey.size() < handler.specDepth()) {
256                                            nodeKey.add(new Integer(0));
257                                            parseSegmentGuts(handler, gutsToken, nodeKey);
258                                            nodeKey.setSize(nodeKey.size()-1);
259                                    }
260                                    else 
261                                            handler.putDatum(nodeKey, gutsToken);
262                            }
263                    }
264                    //nodeKey.setSize(nodeKey.size()-1); // undoing add done at top of this func
265            }
266    
267            public static void main(String args[])
268            {
269                    if(args.length >= 1) {
270                            //String message = "MSH|^~\\&||||foo|foo|foo";
271                            System.out.println(args[0]);
272    
273                            Properties props = new Properties();
274    
275                            Vector msgMask = new Vector();
276                            msgMask.add(new DatumPath());
277    
278                            System.err.println("ER7.parseMessage returned " + parseMessage(props, msgMask, args[0]));
279                            props.list(System.out);
280                    }
281            }
282            
283    }
284