001 /** 002 * The contents of this file are subject to the Mozilla Public License Version 1.1 003 * (the "License"); you may not use this file except in compliance with the License. 004 * You may obtain a copy of the License at http://www.mozilla.org/MPL/ 005 * Software distributed under the License is distributed on an "AS IS" basis, 006 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 007 * specific language governing rights and limitations under the License. 008 * 009 * The Original Code is "MessageQuery.java". Description: 010 * "Queries messages in an SQL-like style. " 011 * 012 * The Initial Developer of the Original Code is University Health Network. Copyright (C) 013 * 2005. All Rights Reserved. 014 * 015 * Contributor(s): ______________________________________. 016 * 017 * Alternatively, the contents of this file may be used under the terms of the 018 * GNU General Public License (the ?GPL?), in which case the provisions of the GPL are 019 * applicable instead of those above. If you wish to allow use of your version of this 020 * file only under the terms of the GPL and not to allow others to use your version 021 * of this file under the MPL, indicate your decision by deleting the provisions above 022 * and replace them with the notice and other provisions required by the GPL License. 023 * If you do not delete the provisions above, a recipient may use your version of 024 * this file under either the MPL or the GPL. 025 * 026 */ 027 package ca.uhn.hl7v2.util; 028 029 import java.util.ArrayList; 030 import java.util.HashMap; 031 import java.util.Map; 032 import java.util.Properties; 033 import java.util.StringTokenizer; 034 import java.util.regex.Matcher; 035 import java.util.regex.Pattern; 036 037 import ca.uhn.hl7v2.HL7Exception; 038 import ca.uhn.hl7v2.model.Message; 039 040 /** 041 * Queries messages in an SQL-like style. We get repeated row-like 042 * structures by looping over repetitions of groups, segments, or fields. 043 * 044 * This is a very advanced class ... maybe too advanced even for you. If you 045 * find it confusing, please note that there are simpler ways to get data 046 * from a message (like calling its getters or using Terser). 047 * 048 * LOOPING: 049 * You specify the loop points as part of the query. For example you could 050 * specify loop point x like this: <code>x = /.MSH-18(*)</code>. The * 051 * is replaced by numbers 0, 1, 2, etc. as you loop through the results, 052 * so this example would loop through repetitions of MSH-18. If 053 * there are multiple loop points, the loops are nested so that each possible 054 * combination is returned. Looping stops when none of the fields under a 055 * loop point are valued. The name of the loop point ('x' in the example 056 * above) is arbitrary. 057 * 058 * SELECTING FIELDS: 059 * The syntax is similar to SQL, except that Terser paths are used in place 060 * of table.field. You can use the "as" keyword to give a field a name, like 061 * this: <code>select /.MSH-7 as msg_date</code>. If your field is under 062 * a loop point, replace the path up to the loop point with a loop point 063 * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code> 064 * 065 * SELECTING ROWS: 066 * A "row" is a combination of all selected fields at one iteration. You 067 * can filter which rows are returned using a where clause similar to that 068 * in SQL. Use exact values or regular expressions, for example: 069 * <code>where {1} like '.*blood.*'</code> or <code>where {1}/PID-3-1 = '111'</code> 070 * Multiple filters can be separated with commas (which mean 'and'). Future 071 * versions may support 'or', negation, brackets, etc., but this version doesn't. 072 * 073 * FULL EXAMPLE: 074 * select {pat-id}-1 as id loop pat-id = ./PID-3(*) where {pat-id}-2 = 'mrn' 075 * 076 * SUBTLETIES OF LOOPING: 077 * A loop point can be under another loop point. For example consider the message: 078 * 079 * MSH|etc.|etc. 080 * Z01|one~two|a 081 * Z01|three~four|b 082 * 083 * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: 084 * a one 085 * a two 086 * b three 087 * b four 088 * 089 * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would return: 090 * a one 091 * a two 092 * b one 093 * b two 094 * 095 * In the first case, one loop point refers to another. In the second case the loops 096 * are treated as independent, just as if they referred to different branches of the 097 * message. 098 * 099 * TODO: could support distinct easily by keeping record of rows and comparing each 100 * one to previous rows 101 * 102 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a> 103 * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $ 104 */ 105 public class MessageQuery { 106 107 /** 108 * @param theMessage an HL7 message from which data are to be queried 109 * @param theQuery the query (see class docs for syntax) 110 * @return data from the message that are selected by the query 111 */ 112 public static Result query(Message theMessage, String theQuery) { 113 Properties clauses = getClauses(theQuery); 114 115 //parse select clause 116 StringTokenizer select = new StringTokenizer(clauses.getProperty("select"), ", ", false); 117 ArrayList fieldPaths = new ArrayList(10); 118 HashMap names = new HashMap(10); 119 while (select.hasMoreTokens()) { 120 String token = select.nextToken(); 121 if (token.equals("as")) { 122 if (!select.hasMoreTokens()) { 123 throw new IllegalArgumentException("Keyword 'as' must be followed by a field label"); 124 } 125 names.put(select.nextToken(), new Integer(fieldPaths.size()-1)); 126 } else { 127 fieldPaths.add(token); 128 } 129 } 130 131 //parse loop clause 132 StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop", ""), ",", false); 133 ArrayList loopPoints = new ArrayList(10); 134 HashMap loopPointNames = new HashMap(10); 135 while (loop.hasMoreTokens()) { 136 String pointDecl = loop.nextToken(); 137 StringTokenizer tok = new StringTokenizer(pointDecl, "=", false); 138 String name = tok.nextToken().trim(); 139 String path = tok.nextToken().trim(); 140 loopPoints.add(path); 141 loopPointNames.put(name, new Integer(loopPoints.size()-1)); 142 } 143 144 //parse where clause 145 //TODO: this will do for now but it should really be evaluated like an expression 146 //rather than a list 147 StringTokenizer where = new StringTokenizer(clauses.getProperty("where", ""), ",", false); 148 ArrayList filters = new ArrayList(); 149 while (where.hasMoreTokens()) { 150 filters.add(where.nextToken()); 151 } 152 String[] filterPaths = new String[filters.size()]; 153 String[] filterPatterns = new String[filters.size()]; 154 boolean[] exactFlags = new boolean[filters.size()]; 155 156 for (int i = 0; i < filters.size(); i++) { 157 exactFlags[i] = true; 158 String filter = (String) filters.get(i); 159 String[] parts = splitFromEnd(filter, "="); 160 if (parts[1] != null) { 161 parts[1] = parts[1].substring(1); 162 } else { 163 exactFlags[i] = false; 164 parts = splitFromEnd(filter, "like"); 165 parts[1] = parts[1].substring(4); 166 } 167 filterPaths[i] = parts[0].trim(); 168 parts[1] = parts[1].trim(); 169 filterPatterns[i] = parts[1].substring(1, parts[1].length()-1); 170 } 171 172 return new ResultImpl(theMessage, 173 (String[]) loopPoints.toArray(new String[0]), 174 loopPointNames, 175 (String[]) fieldPaths.toArray(new String[0]), 176 names, 177 filterPaths, 178 filterPatterns, 179 exactFlags); 180 } 181 182 183 private static Properties getClauses(String theQuery) { 184 Properties clauses = new Properties(); 185 186 String[] split = splitFromEnd(theQuery, "where "); 187 setClause(clauses, "where", split[1]); 188 189 split = splitFromEnd(split[0], "loop "); 190 setClause(clauses, "loop", split[1]); 191 setClause(clauses, "select", split[0]); 192 193 if (clauses.getProperty("where", "").indexOf("loop ") >= 0) { 194 throw new IllegalArgumentException("The loop clause must precede the where clause"); 195 } 196 if (clauses.getProperty("select") == null) { 197 throw new IllegalArgumentException("The query must begin with a select clause"); 198 } 199 return clauses; 200 } 201 202 private static void setClause(Properties theClauses, String theName, String theClause) { 203 if (theClause != null) { 204 theClauses.setProperty(theName, theClause.substring(theName.length()).trim()); 205 } 206 } 207 208 private static String[] splitFromEnd(String theString, String theMarker) { 209 String[] result = new String[2]; 210 int begin = theString.indexOf(theMarker); 211 if (begin >= 0) { 212 result[0] = theString.substring(0, begin); 213 result[1] = theString.substring(begin); 214 } else { 215 result[0] = theString; 216 } 217 return result; 218 } 219 220 /** 221 * A result set for a message query. 222 * 223 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a> 224 * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $ 225 */ 226 public static interface Result { 227 228 /** 229 * @param theFieldNumber numbered from zero in the order they are specified in the 230 * query 231 * @return the corresponding value in the current row 232 */ 233 public String get(int theFieldNumber); 234 235 /** 236 * @param theFieldName a field name as specified in the query with the keyword "as" 237 * @return the corresponding value in the current row 238 */ 239 public String get(String theFieldName); 240 241 /** 242 * @return a list of named fields as defined with 'as' in the query 243 */ 244 public String[] getNamedFields(); 245 246 /** 247 * Advances to the next "row" of data if one is available. 248 * @return true if another row is available 249 * @throws HL7Exception 250 */ 251 public boolean next() throws HL7Exception; 252 253 } 254 255 private static class ResultImpl implements Result { 256 257 private Terser myTerser; 258 private String[] myValues; 259 private String[] myLoopPoints; 260 private Map myLoopPointNames; 261 private String[] myFieldPaths; 262 private Map myFieldNames; 263 private int[] myIndices; 264 private int[] myNumEmpty; //number of empty sub-loops since last non-empty one 265 private int[] myMaxNumEmpty; 266 private boolean myNonLoopingQuery = false; 267 private String[] myWherePaths; 268 private String[] myWhereValues; 269 private String[] myWherePatterns; 270 private boolean[] myExactMatchFlags; 271 272 public ResultImpl(Message theMessage, 273 String[] theLoopPoints, 274 Map theLoopPointNames, 275 String[] theFieldPaths, 276 Map theFieldNames, 277 String[] theWherePaths, 278 String[] theWherePatterns, 279 boolean[] theExactMatchFlags) { 280 281 myTerser = new Terser(theMessage); 282 myLoopPoints = theLoopPoints; 283 myIndices = new int[theLoopPoints.length]; 284 myNumEmpty = new int[theLoopPoints.length]; 285 myMaxNumEmpty = getMaxNumEmpty(theLoopPoints); 286 myLoopPointNames = theLoopPointNames; 287 myFieldPaths = theFieldPaths; 288 myValues = new String[theFieldPaths.length]; 289 myFieldNames = theFieldNames; 290 myWherePaths = theWherePaths; 291 myWherePatterns = theWherePatterns; 292 myExactMatchFlags = theExactMatchFlags; 293 294 if (theLoopPoints.length == 0) { 295 myNonLoopingQuery = true; //if no loops, give ourselves 1 iteration 296 } else { 297 myIndices[myIndices.length - 1] = -1; //start before 1st iteration 298 } 299 300 } 301 302 //extracts max number of empty iterations for each loop point (this is communicated 303 //as an optional integer after the *, e.g. blah(*3) ... default is 0). 304 private int[] getMaxNumEmpty(String[] theLoopPoints) { 305 int[] retVal = new int[theLoopPoints.length]; 306 for (int i = 0; i < theLoopPoints.length; i++) { 307 retVal[i] = getMaxNumEmpty(theLoopPoints[i]); 308 } 309 return retVal; 310 } 311 312 private int getMaxNumEmpty(String theLoopPoint) { 313 int retVal = 0; //default 314 315 Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint); 316 if (m.find()) { 317 String num = m.group(1); 318 retVal = Integer.parseInt(num); 319 } 320 321 return retVal; 322 } 323 324 //returns true if some field under the given loop point has a value at the present 325 //iteration 326 private boolean currentRowValued(int theLoopPoint) { 327 for (int i = 0; i < myFieldPaths.length; i++) { 328 if (referencesLoop(myFieldPaths[i], theLoopPoint)) { 329 String value = (String) myValues[i]; 330 if (value != null && value.length() > 0) { 331 return true; 332 } 333 } 334 } 335 return false; 336 } 337 338 //returns true if the current row matches the where clause filters 339 private boolean currentRowMatchesFilter() { 340 for (int i = 0; i < myWhereValues.length; i++) { 341 if (myExactMatchFlags[i]) { 342 if (!myWherePatterns[i].equals(myWhereValues[i])) { 343 return false; 344 } 345 } else { 346 if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) { 347 return false; 348 } 349 } 350 } 351 return true; 352 } 353 354 //true if the given path references the given loop point (directly 355 //or indirectly) 356 private boolean referencesLoop(String theFieldPath, int theLoopPoint) { 357 String path = theFieldPath; 358 int lp; 359 while ((lp = getLoopPointReference(path)) >= 0) { 360 if (lp == theLoopPoint) { 361 return true; 362 } else { 363 path = myLoopPoints[lp]; 364 } 365 } 366 return false; 367 } 368 369 //expands a set of paths to their current loop point iterations, and gets 370 //current values from our message 371 private String[] getCurrentValues(String[] thePaths) throws HL7Exception { 372 String[] paths = composePaths(thePaths); 373 String[] values = new String[paths.length]; 374 for (int i = 0; i < paths.length; i++) { 375 values[i] = myTerser.get(paths[i]); 376 if (values[i] == null) { 377 values[i] = ""; 378 } 379 } 380 return values; 381 } 382 383 //creates full Terser paths from current location, loop points, and given paths 384 //with loop point references 385 private String[] composePaths(String[] thePaths) { 386 String[] currentLoopPoints = composeLoopPoints(); 387 String[] result = new String[thePaths.length]; 388 for (int i = 0; i < thePaths.length; i++) { 389 result[i] = thePaths[i]; 390 int ref = getLoopPointReference(thePaths[i]); 391 if (ref >= 0) { 392 result[i] = expandLoopPointReference(result[i], currentLoopPoints[ref]); 393 } 394 } 395 return result; 396 } 397 398 //parameterizes loop points with present location (i.e. replaces * with current 399 //indices) 400 private String[] composeLoopPoints() { 401 String[] result = new String[myLoopPoints.length]; 402 for (int i = 0; i < myLoopPoints.length; i++) { 403 result[i] = myLoopPoints[i].replaceAll("\\*\\d*", String.valueOf(myIndices[i])); 404 405 int ref = getLoopPointReference(myLoopPoints[i]); 406 if (ref >= i) { 407 throw new IllegalStateException("Loop point must be defined after the " + 408 "one it references: " + myLoopPoints[i]); 409 } else if (ref >= 0) { 410 result[i] = expandLoopPointReference(result[i], result[ref]); 411 } 412 } 413 return result; 414 } 415 416 //extracts LP# of label between first '{' and first '}', or -1 if there isn't one 417 private int getLoopPointReference(String thePath) { 418 StringTokenizer tok = new StringTokenizer(thePath, "{}", false); 419 if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) { 420 String ref = tok.nextToken(); 421 return ((Integer) myLoopPointNames.get(ref)).intValue(); 422 } else { 423 return -1; 424 } 425 } 426 427 private String expandLoopPointReference(String thePath, String theLoopPoint) { 428 return thePath.replaceAll("\\{.*\\}", theLoopPoint); 429 } 430 431 /** 432 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int) 433 */ 434 public String get(int theFieldNumber) { 435 if (theFieldNumber < 0 || theFieldNumber >= myValues.length) { 436 throw new IllegalArgumentException("Field number must be between 0 and " 437 + (myValues.length - 1)); 438 } 439 return (String) myValues[theFieldNumber]; 440 } 441 442 /** 443 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String) 444 */ 445 public String get(String theFieldName) { 446 Integer fieldNum = (Integer) myFieldNames.get(theFieldName); 447 if (fieldNum == null) { 448 throw new IllegalArgumentException("Field name not recognized: " + theFieldName); 449 } 450 return get(fieldNum.intValue()); 451 } 452 453 /** 454 * @throws HL7Exception 455 * @see ca.uhn.hl7v2.util.MessageQuery.Result#next() 456 */ 457 public boolean next() throws HL7Exception { 458 if (myNonLoopingQuery) { 459 myNonLoopingQuery = false; 460 myValues = getCurrentValues(myFieldPaths); 461 myWhereValues = getCurrentValues(myWherePaths); 462 return currentRowMatchesFilter(); 463 } 464 465 boolean hasNext = false; 466 findNext : for (int i = myIndices.length - 1; i >= 0; i--) { 467 boolean gotMatch = false; 468 while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) { 469 myIndices[i]++; 470 myValues = getCurrentValues(myFieldPaths); 471 myWhereValues = getCurrentValues(myWherePaths); 472 473 if (!currentRowValued(i)) { 474 myNumEmpty[i]++; 475 } else { 476 myNumEmpty[i] = 0; 477 } 478 if (currentRowMatchesFilter()) { 479 gotMatch = true; 480 } 481 } 482 483 hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// && currentRowMatchesFilter(); 484 if (hasNext) { 485 break findNext; 486 } 487 488 myIndices[i] = 0; 489 myNumEmpty[i] = 0; 490 491 //TODO: if we aren't allowing empties in this loop, and have no value, we want to 492 //return the null in the super-loop. However, we don't know which loop point, if 493 //any, is the super-loop. If it was the next one we could do this ... 494 //if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0 && myIndices[i-1] == 0) { 495 // myIndices[i-1] = -1; 496 //} ... but it may not be, so we'll ignore this problem for now. 497 } 498 return hasNext; 499 } 500 501 /** 502 * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields() 503 */ 504 public String[] getNamedFields() { 505 return (String[]) myFieldNames.keySet().toArray(new String[0]); 506 } 507 508 } 509 510 }