001    /**
002     * The contents of this file are subject to the Mozilla Public License Version 1.1
003     * (the "License"); you may not use this file except in compliance with the License.
004     * You may obtain a copy of the License at http://www.mozilla.org/MPL/
005     * Software distributed under the License is distributed on an "AS IS" basis,
006     * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007     * specific language governing rights and limitations under the License.
008     *
009     * The Original Code is "MessageQuery.java".  Description:
010     * "Queries messages in an SQL-like style.  "
011     *
012     * The Initial Developer of the Original Code is University Health Network. Copyright (C)
013     * 2005.  All Rights Reserved.
014     *
015     * Contributor(s): ______________________________________.
016     *
017     * Alternatively, the contents of this file may be used under the terms of the
018     * GNU General Public License (the  ?GPL?), in which case the provisions of the GPL are
019     * applicable instead of those above.  If you wish to allow use of your version of this
020     * file only under the terms of the GPL and not to allow others to use your version
021     * of this file under the MPL, indicate your decision by deleting  the provisions above
022     * and replace  them with the notice and other provisions required by the GPL License.
023     * If you do not delete the provisions above, a recipient may use your version of
024     * this file under either the MPL or the GPL.
025     *
026     */
027    package ca.uhn.hl7v2.util;
028    
029    import java.util.ArrayList;
030    import java.util.HashMap;
031    import java.util.Map;
032    import java.util.Properties;
033    import java.util.StringTokenizer;
034    import java.util.regex.Matcher;
035    import java.util.regex.Pattern;
036    
037    import ca.uhn.hl7v2.HL7Exception;
038    import ca.uhn.hl7v2.model.Message;
039    
040    /**
041     * Queries messages in an SQL-like style.  We get repeated row-like 
042     * structures by looping over repetitions of groups, segments, or fields. 
043     * 
044     * This is a very advanced class ... maybe too advanced even for you.  If you 
045     * find it confusing, please note that there are simpler ways to get data 
046     * from a message (like calling its getters or using Terser).   
047     * 
048     * LOOPING:    
049     * You specify the loop points as part of the query.  For example you could 
050     * specify loop point x like this: <code>x = /.MSH-18(*)</code>.  The * 
051     * is replaced by numbers 0, 1, 2, etc. as you loop through the results, 
052     * so this example would loop through repetitions of MSH-18.  If 
053     * there are multiple loop points, the loops are nested so that each possible 
054     * combination is returned.  Looping stops when none of the fields under a 
055     * loop point are valued.  The name of the loop point ('x' in the example 
056     * above) is arbitrary.        
057     * 
058     * SELECTING FIELDS: 
059     * The syntax is similar to SQL, except that Terser paths are used in place
060     * of table.field.  You can use the "as" keyword to give a field a name, like 
061     * this: <code>select /.MSH-7 as msg_date</code>.  If your field is under 
062     * a loop point, replace the path up to the loop point with a loop point 
063     * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code>
064     * 
065     * SELECTING ROWS:
066     * A "row" is a combination of all selected fields at one iteration.  You 
067     * can filter which rows are returned using a where clause similar to that
068     * in SQL.  Use exact values or regular expressions, for example: 
069     * <code>where {1} like '.*blood.*'</code> or <code>where {1}/PID-3-1 = '111'</code>
070     * Multiple filters can be separated with commas (which mean 'and').  Future
071     * versions may support 'or', negation, brackets, etc., but this version doesn't.  
072     * 
073     * FULL EXAMPLE: 
074     * select {pat-id}-1 as id loop pat-id = ./PID-3(*) where {pat-id}-2 = 'mrn'
075     * 
076     * SUBTLETIES OF LOOPING: 
077     * A loop point can be under another loop point.  For example consider the message:   
078     * 
079     * MSH|etc.|etc.
080     * Z01|one~two|a
081     * Z01|three~four|b
082     * 
083     * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: 
084     * a one
085     * a two 
086     * b three
087     * b four
088     * 
089     * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would return:
090     * a one
091     * a two 
092     * b one
093     * b two
094     * 
095     * In the first case, one loop point refers to another.  In the second case the loops 
096     * are treated as independent, just as if they referred to different branches of the 
097     * message.  
098     * 
099     * TODO: could support distinct easily by keeping record of rows and comparing each 
100     * one to previous rows 
101     * 
102     * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
103     * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $
104     */
105    public class MessageQuery {
106    
107        /**
108         * @param theMessage an HL7 message from which data are to be queried 
109         * @param theQuery the query (see class docs for syntax)
110         * @return data from the message that are selected by the query 
111         */
112        public static Result query(Message theMessage, String theQuery) {
113            Properties clauses = getClauses(theQuery);
114            
115            //parse select clause
116            StringTokenizer select = new StringTokenizer(clauses.getProperty("select"), ", ", false);
117            ArrayList fieldPaths = new ArrayList(10);
118            HashMap names = new HashMap(10);
119            while (select.hasMoreTokens()) {
120                String token = select.nextToken();
121                if (token.equals("as")) {
122                    if (!select.hasMoreTokens()) {
123                        throw new IllegalArgumentException("Keyword 'as' must be followed by a field label");
124                    }
125                    names.put(select.nextToken(), new Integer(fieldPaths.size()-1));
126                } else {
127                    fieldPaths.add(token);  
128                }
129            }
130            
131            //parse loop clause 
132            StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop", ""), ",", false);
133            ArrayList loopPoints = new ArrayList(10);
134            HashMap loopPointNames = new HashMap(10);
135            while (loop.hasMoreTokens()) {
136                String pointDecl = loop.nextToken();
137                StringTokenizer tok = new StringTokenizer(pointDecl, "=", false);
138                String name = tok.nextToken().trim();
139                String path = tok.nextToken().trim();
140                loopPoints.add(path);
141                loopPointNames.put(name, new Integer(loopPoints.size()-1));
142            }
143                    
144            //parse where clause 
145            //TODO: this will do for now but it should really be evaluated like an expression 
146            //rather than a list  
147            StringTokenizer where = new StringTokenizer(clauses.getProperty("where", ""), ",", false);
148            ArrayList filters = new ArrayList();
149            while (where.hasMoreTokens()) {
150                filters.add(where.nextToken());
151            }
152            String[] filterPaths = new String[filters.size()];
153            String[] filterPatterns = new String[filters.size()];
154            boolean[] exactFlags = new boolean[filters.size()];
155            
156            for (int i = 0; i < filters.size(); i++) {
157                exactFlags[i] = true;
158                String filter = (String) filters.get(i);
159                String[] parts = splitFromEnd(filter, "=");
160                if (parts[1] != null) {                
161                    parts[1] = parts[1].substring(1);
162                } else {
163                    exactFlags[i] = false;
164                    parts = splitFromEnd(filter, "like");
165                    parts[1] = parts[1].substring(4);
166                }
167                filterPaths[i] = parts[0].trim();
168                parts[1] = parts[1].trim();
169                filterPatterns[i] = parts[1].substring(1, parts[1].length()-1);
170            }        
171            
172            return new ResultImpl(theMessage, 
173                    (String[]) loopPoints.toArray(new String[0]), 
174                    loopPointNames, 
175                    (String[]) fieldPaths.toArray(new String[0]), 
176                    names,
177                    filterPaths, 
178                    filterPatterns, 
179                    exactFlags);
180        }
181        
182        
183        private static Properties getClauses(String theQuery) {
184            Properties clauses = new Properties();
185            
186            String[] split = splitFromEnd(theQuery, "where ");
187            setClause(clauses, "where", split[1]);
188                    
189            split = splitFromEnd(split[0], "loop ");
190            setClause(clauses, "loop", split[1]);
191            setClause(clauses, "select", split[0]);
192            
193            if (clauses.getProperty("where", "").indexOf("loop ") >= 0) {
194                throw new IllegalArgumentException("The loop clause must precede the where clause");            
195            }
196            if (clauses.getProperty("select") == null) {
197                throw new IllegalArgumentException("The query must begin with a select clause");                        
198            }
199            return clauses;
200        }
201        
202        private static void setClause(Properties theClauses, String theName, String theClause) {
203            if (theClause != null) {
204                theClauses.setProperty(theName, theClause.substring(theName.length()).trim());
205            }
206        }
207        
208        private static String[] splitFromEnd(String theString, String theMarker) {
209            String[] result = new String[2];
210            int begin = theString.indexOf(theMarker);
211            if (begin >= 0) {
212                result[0] = theString.substring(0, begin);
213                result[1] = theString.substring(begin);
214            } else {
215                result[0] = theString;
216            }        
217            return result;
218        }
219        
220        /**
221         * A result set for a message query.    
222         * 
223         * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
224         * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $
225         */
226        public static interface Result {
227            
228            /**
229             * @param theFieldNumber numbered from zero in the order they are specified in the 
230             *      query
231             * @return the corresponding value in the current row 
232             */
233            public String get(int theFieldNumber);
234            
235            /**
236             * @param theFieldName a field name as specified in the query with the keyword "as" 
237             * @return the corresponding value in the current row 
238             */
239            public String get(String theFieldName);
240            
241            /**
242             * @return a list of named fields as defined with 'as' in the query
243             */
244            public String[] getNamedFields();
245            
246            /**
247             * Advances to the next "row" of data if one is available. 
248             * @return true if another row is available  
249             * @throws HL7Exception
250             */
251            public boolean next() throws HL7Exception;
252    
253        }
254        
255        private static class ResultImpl implements Result {
256    
257            private Terser myTerser;
258            private String[] myValues;
259            private String[] myLoopPoints;
260            private Map myLoopPointNames;
261            private String[] myFieldPaths;
262            private Map myFieldNames;
263            private int[] myIndices;
264            private int[] myNumEmpty; //number of empty sub-loops since last non-empty one
265            private int[] myMaxNumEmpty;
266            private boolean myNonLoopingQuery = false;
267            private String[] myWherePaths;
268            private String[] myWhereValues;
269            private String[] myWherePatterns;
270            private boolean[] myExactMatchFlags;
271            
272            public ResultImpl(Message theMessage, 
273                    String[] theLoopPoints,  
274                    Map theLoopPointNames, 
275                    String[] theFieldPaths, 
276                    Map theFieldNames, 
277                    String[] theWherePaths, 
278                    String[] theWherePatterns, 
279                    boolean[] theExactMatchFlags) {
280                
281                myTerser = new Terser(theMessage);
282                myLoopPoints = theLoopPoints;
283                myIndices = new int[theLoopPoints.length];
284                myNumEmpty = new int[theLoopPoints.length];
285                myMaxNumEmpty = getMaxNumEmpty(theLoopPoints);
286                myLoopPointNames = theLoopPointNames;
287                myFieldPaths = theFieldPaths;
288                myValues = new String[theFieldPaths.length];
289                myFieldNames = theFieldNames;
290                myWherePaths = theWherePaths;
291                myWherePatterns = theWherePatterns;
292                myExactMatchFlags = theExactMatchFlags;
293                            
294                if (theLoopPoints.length == 0) {
295                    myNonLoopingQuery = true; //if no loops, give ourselves 1 iteration
296                } else {
297                    myIndices[myIndices.length - 1] = -1; //start before 1st iteration
298                }
299                
300            }
301            
302            //extracts max number of empty iterations for each loop point (this is communicated 
303            //as an optional integer after the *, e.g. blah(*3) ... default is 0).  
304            private int[] getMaxNumEmpty(String[] theLoopPoints) {
305                int[] retVal = new int[theLoopPoints.length];
306                for (int i = 0; i < theLoopPoints.length; i++) {
307                    retVal[i] = getMaxNumEmpty(theLoopPoints[i]);
308                }
309                return retVal;
310            }
311            
312            private int getMaxNumEmpty(String theLoopPoint) {
313                int retVal = 0; //default
314                
315                Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint);
316                if (m.find()) {
317                    String num = m.group(1);
318                    retVal = Integer.parseInt(num);
319                }
320                
321                return retVal;
322            }
323            
324            //returns true if some field under the given loop point has a value at the present 
325            //iteration
326            private boolean currentRowValued(int theLoopPoint) {
327                for (int i = 0; i < myFieldPaths.length; i++) {
328                    if (referencesLoop(myFieldPaths[i], theLoopPoint)) {
329                        String value = (String) myValues[i];
330                        if (value != null && value.length() > 0) {
331                            return true;
332                        }                    
333                    }
334                }
335                return false;
336            }
337            
338            //returns true if the current row matches the where clause filters
339            private boolean currentRowMatchesFilter() {
340                for (int i = 0; i < myWhereValues.length; i++) {
341                    if (myExactMatchFlags[i]) {
342                        if (!myWherePatterns[i].equals(myWhereValues[i])) {
343                            return false;
344                        }                    
345                    } else {
346                        if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) {
347                            return false;
348                        }
349                    }
350                }
351                return true;
352            }
353            
354            //true if the given path references the given loop point (directly 
355            //or indirectly)
356            private boolean referencesLoop(String theFieldPath, int theLoopPoint) {
357                String path = theFieldPath;
358                int lp;
359                while ((lp = getLoopPointReference(path)) >= 0) {
360                    if (lp == theLoopPoint) {
361                        return true;
362                    } else {
363                        path = myLoopPoints[lp];
364                    }                
365                }
366                return false;
367            }
368            
369            //expands a set of paths to their current loop point iterations, and gets 
370            //current values from our message
371            private String[] getCurrentValues(String[] thePaths) throws HL7Exception {
372                String[] paths = composePaths(thePaths);
373                String[] values = new String[paths.length]; 
374                for (int i = 0; i < paths.length; i++) {
375                    values[i] = myTerser.get(paths[i]);
376                    if (values[i] == null) {
377                        values[i] = "";
378                    }
379                }
380                return values;
381            }               
382            
383            //creates full Terser paths from current location, loop points, and given paths 
384            //with loop point references 
385            private String[] composePaths(String[] thePaths) {
386                String[] currentLoopPoints = composeLoopPoints();
387                String[] result = new String[thePaths.length];
388                for (int i = 0; i < thePaths.length; i++) {
389                    result[i] = thePaths[i];
390                    int ref = getLoopPointReference(thePaths[i]);
391                    if (ref >= 0) {
392                        result[i] = expandLoopPointReference(result[i], currentLoopPoints[ref]);
393                    }                
394                }
395                return result;
396            }
397            
398            //parameterizes loop points with present location (i.e. replaces * with current
399            //indices)
400            private String[] composeLoopPoints() {
401                String[] result = new String[myLoopPoints.length];
402                for (int i = 0; i < myLoopPoints.length; i++) {
403                    result[i] = myLoopPoints[i].replaceAll("\\*\\d*", String.valueOf(myIndices[i]));
404    
405                    int ref = getLoopPointReference(myLoopPoints[i]);
406                    if (ref >= i) {
407                        throw new IllegalStateException("Loop point must be defined after the " +
408                                "one it references: " + myLoopPoints[i]);
409                    } else if (ref >= 0) {
410                        result[i] = expandLoopPointReference(result[i], result[ref]);
411                    }
412                }
413                return result;
414            }
415            
416            //extracts LP# of label between first '{' and first '}', or -1 if there isn't one
417            private int getLoopPointReference(String thePath) {
418                StringTokenizer tok = new StringTokenizer(thePath, "{}", false);
419                if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) {
420                    String ref = tok.nextToken();
421                    return ((Integer) myLoopPointNames.get(ref)).intValue();
422                } else {
423                    return -1;
424                }
425            }
426            
427            private String expandLoopPointReference(String thePath, String theLoopPoint) {
428                return thePath.replaceAll("\\{.*\\}", theLoopPoint);
429            }
430                    
431            /**
432             * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int)
433             */
434            public String get(int theFieldNumber) {
435                if (theFieldNumber < 0 || theFieldNumber >= myValues.length) {
436                    throw new IllegalArgumentException("Field number must be between 0 and " 
437                            + (myValues.length - 1));
438                }
439                return (String) myValues[theFieldNumber];
440            }
441    
442            /**
443             * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String)
444             */
445            public String get(String theFieldName) {
446                Integer fieldNum = (Integer) myFieldNames.get(theFieldName);
447                if (fieldNum == null) {
448                    throw new IllegalArgumentException("Field name not recognized: " + theFieldName);
449                }
450                return get(fieldNum.intValue());
451            }
452    
453            /** 
454             * @throws HL7Exception
455             * @see ca.uhn.hl7v2.util.MessageQuery.Result#next()
456             */
457            public boolean next() throws HL7Exception {
458                if (myNonLoopingQuery) {
459                    myNonLoopingQuery = false;
460                    myValues = getCurrentValues(myFieldPaths);
461                    myWhereValues = getCurrentValues(myWherePaths);
462                    return currentRowMatchesFilter();
463                }
464                
465                boolean hasNext = false;
466                findNext : for (int i = myIndices.length - 1; i >= 0; i--) {    
467                    boolean gotMatch = false;
468                    while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) {
469                        myIndices[i]++;
470                        myValues = getCurrentValues(myFieldPaths);
471                        myWhereValues = getCurrentValues(myWherePaths);
472                        
473                        if (!currentRowValued(i)) {
474                            myNumEmpty[i]++;
475                        } else {
476                            myNumEmpty[i] = 0;
477                        }
478                        if (currentRowMatchesFilter()) {
479                            gotMatch = true;
480                        }
481                    }
482                    
483                    hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// && currentRowMatchesFilter();
484                    if (hasNext) {
485                        break findNext;
486                    }                                 
487                    
488                    myIndices[i] = 0;
489                    myNumEmpty[i] = 0;
490                    
491                    //TODO: if we aren't allowing empties in this loop, and have no value, we want to 
492                    //return the null in the super-loop.  However, we don't know which loop point, if 
493                    //any, is the super-loop.  If it was the next one we could do this ... 
494                    //if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0 && myIndices[i-1] == 0) {
495                    //    myIndices[i-1] = -1;
496                    //} ... but it may not be, so we'll ignore this problem for now.  
497                }
498                return hasNext;
499            }
500    
501            /**
502             * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields()
503             */
504            public String[] getNamedFields() {
505                return (String[]) myFieldNames.keySet().toArray(new String[0]);
506            }
507            
508        }
509    
510    }