001 package ca.uhn.hl7v2.preparser; 002 003 import java.lang.reflect.* ; 004 import java.util.* ; 005 import ca.uhn.hl7v2.parser.* ; 006 007 /* 008 The point of this class (all static members, not instantiatable) is to take a 009 traditionally-encoded HL7 message and add all it's contents to a Properties 010 object, via the parseMessage() method. 011 012 The key-value pairs added to the Properties argument have keys that represent a 013 datum's location in the message. (in the ZYX-1-2[0] style. TODO: define 014 exactly.) See Datum, particularly the toString() of that class. 015 Anyway, the Properties keys are those and the values are the tokens found. 016 017 Note: we accept useless field repetition separators at the end of a 018 field repetition sequence. i.e. |855-4545~555-3792~~~| , and interpret this 019 as definining repetitions 0 and 1. This might not be allowed. (HL7 2.3.1 020 section 2.10 explicitly allows this behaviour for fields / components / 021 subcomponents, but the allowance is notably absent for repetitions. TODO: 022 nail down.) We allow it anyway. 023 024 Also, we accept things like |855-4545~~555-3792|, and interpret it as defining 025 repetitions 0 and 2. The spec would seem to disallow this too, but there's no 026 harm. :D 027 */ 028 029 public class ER7 { 030 031 private ER7() {} 032 033 /** characters that delimit segments. for use with StringTokenizer. 034 We are forgiving: HL7 2.3.1 section 2.7 says that carriage return ('\r') is 035 the only segment delimiter. TODO: check other versions. */ 036 static final String segmentSeparators = "\r\n\f"; 037 038 /** Parses message and dumps contents to props, with keys in the 039 ZYX[a]-b[c]-d-e style. 040 */ 041 public static boolean parseMessage(/*out*/ Properties props, 042 /*in*/ Vector /*<DatumPath>*/ msgMask, /*in*/ String message) 043 { 044 boolean ok = false; 045 if(message != null) { 046 if(props == null) 047 props = new Properties(); 048 049 StringTokenizer messageTokenizer 050 = new StringTokenizer(message, segmentSeparators); 051 if(messageTokenizer.hasMoreTokens()) { 052 String firstSegment = messageTokenizer.nextToken(); 053 EncodingCharacters encodingChars = new EncodingCharacters('0', "0000"); 054 if(parseMSHSegmentWhole(props, msgMask, encodingChars, firstSegment)) { 055 ok = true; 056 TreeMap /*<String -> Integer>*/ segmentId2nextRepIdx = new TreeMap(); 057 segmentId2nextRepIdx.put(new String("MSH"), new Integer(1)); 058 // in case we find another MSH segment, heh. 059 while(messageTokenizer.hasMoreTokens()) { 060 parseSegmentWhole(props, segmentId2nextRepIdx, 061 msgMask, encodingChars, messageTokenizer.nextToken()); 062 } 063 } 064 } 065 } 066 return ok; 067 } 068 069 /** given segment, starting with "MSH", then encoding characters, etc... 070 put MSH[0]-1[0]-1-1 (== MSH-1) and MSH[0]-2[0]-1-1 (== MSH-2) into props, if found, 071 plus everything else found in 'segment' */ 072 protected static boolean parseMSHSegmentWhole(/*out*/ Properties props, 073 /*in*/ Vector /*<DatumPath>*/ msgMask, /*in*/ EncodingCharacters encodingChars, 074 /*in*/ String segment) 075 { 076 boolean ret = false; 077 try { 078 ER7SegmentHandler handler = new ER7SegmentHandler(); 079 handler.m_props = props; 080 handler.m_encodingChars = encodingChars; 081 handler.m_segmentId = "MSH"; 082 handler.m_segmentRepIdx = 0; 083 if(msgMask != null) 084 handler.m_msgMask = msgMask; 085 else { 086 handler.m_msgMask = new Vector(); 087 handler.m_msgMask.add(new DatumPath()); // everything will pass this 088 // (every DatumPath startsWith the zero-length DatumPath) 089 } 090 091 encodingChars.setFieldSeparator(segment.charAt(3)); 092 Vector /*<Integer>*/ nodeKey = new Vector(); 093 nodeKey.add(new Integer(0)); 094 handler.putDatum(nodeKey, String.valueOf(encodingChars.getFieldSeparator())); 095 encodingChars.setComponentSeparator(segment.charAt(4)); 096 encodingChars.setRepetitionSeparator(segment.charAt(5)); 097 encodingChars.setEscapeCharacter(segment.charAt(6)); 098 encodingChars.setSubcomponentSeparator(segment.charAt(7)); 099 nodeKey.set(0, new Integer(1)); 100 handler.putDatum(nodeKey, encodingChars.toString()); 101 102 if(segment.charAt(8) == encodingChars.getFieldSeparator()) { 103 ret = true; 104 // now -- we recurse 105 // through fields / field-repetitions / components / subcomponents. 106 nodeKey.clear(); 107 nodeKey.add(new Integer(2)); 108 parseSegmentGuts(handler, segment.substring(9), nodeKey); 109 } 110 } 111 catch(IndexOutOfBoundsException e) {} 112 catch(NullPointerException e) {} 113 114 return ret; 115 } 116 117 /** pass in a whole segment (of type other than MSH), including message type 118 at the start, according to encodingChars, and we'll parse the contents and 119 put them in props. */ 120 protected static void parseSegmentWhole(/*out*/ Properties props, 121 /*in/out*/ Map /*<String -> Integer>*/ segmentId2nextRepIdx, 122 /*in*/ Vector /*<DatumPath>*/ msgMask, /*in*/ EncodingCharacters encodingChars, 123 /*in*/ String segment) 124 { 125 try { 126 String segmentId = segment.substring(0, 3); 127 128 int currentSegmentRepIdx = 0; 129 if(segmentId2nextRepIdx.containsKey(segmentId)) 130 currentSegmentRepIdx = ((Integer)segmentId2nextRepIdx.get(segmentId)).intValue(); 131 else 132 currentSegmentRepIdx = 0; 133 segmentId2nextRepIdx.put(segmentId, new Integer(currentSegmentRepIdx+1)); 134 135 // will only bother to parse this segment if any of it's contents will 136 // be dumped to props. 137 boolean parseThisSegment = false; 138 DatumPath segmentIdAsDatumPath = (new DatumPath()).add(segmentId); 139 for(Iterator maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 140 parseThisSegment = segmentIdAsDatumPath.startsWith((DatumPath)(maskIt.next())); 141 for(Iterator maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 142 parseThisSegment = ((DatumPath)(maskIt.next())).startsWith(segmentIdAsDatumPath); 143 144 if(parseThisSegment && (segment.charAt(3) == encodingChars.getFieldSeparator())) { 145 ER7SegmentHandler handler = new ER7SegmentHandler(); 146 handler.m_props = props; 147 handler.m_encodingChars = encodingChars; 148 handler.m_segmentId = segmentId; 149 if(msgMask != null) 150 handler.m_msgMask = msgMask; 151 else { 152 handler.m_msgMask = new Vector(); 153 handler.m_msgMask.add(new DatumPath()); // everything will pass this 154 // (every DatumPath startsWith the zero-length DatumPath) 155 } 156 handler.m_segmentRepIdx = currentSegmentRepIdx; 157 158 Vector nodeKey = new Vector(); 159 nodeKey.add(new Integer(0)); 160 parseSegmentGuts(handler, segment.substring(4), nodeKey); 161 } 162 } 163 catch(NullPointerException e) {} 164 catch(IndexOutOfBoundsException e) {} 165 } 166 167 static protected interface Handler 168 { 169 public int specDepth(); 170 public char delim(int level); 171 172 public void putDatum(Vector/*<Integer>*/ nodeKey, String value); 173 } 174 175 static protected class ER7SegmentHandler implements Handler 176 { 177 Properties m_props; 178 179 EncodingCharacters m_encodingChars; 180 181 String m_segmentId; 182 int m_segmentRepIdx; 183 184 Vector /*<DatumPath>*/ m_msgMask; 185 186 public int specDepth() {return 4;} 187 188 public char delim(int level) 189 { 190 if(level == 0) 191 return m_encodingChars.getFieldSeparator(); 192 else if(level == 1) 193 return m_encodingChars.getRepetitionSeparator(); 194 else if(level == 2) 195 return m_encodingChars.getComponentSeparator(); 196 else if(level == 3) 197 return m_encodingChars.getSubcomponentSeparator(); 198 else 199 throw new java.lang.Error(); 200 } 201 202 public void putDatum(Vector /*<Integer>*/ valNodeKey, String value) 203 { 204 // make a DatumPath from valNodeKey and info in this: 205 DatumPath valDatumPath = new DatumPath(); 206 valDatumPath.add(m_segmentId).add(m_segmentRepIdx); 207 for(int i=0; i<valNodeKey.size(); ++i) { 208 // valNodeKey: everything counts from 0 -- not so with DatumPath ... sigh. 209 int itval = ((Integer)valNodeKey.get(i)).intValue(); 210 valDatumPath.add(new Integer(i == 1 ? itval : itval+1)); 211 } 212 213 // see if valDatumPath passes m_msgMask: 214 boolean valDatumPathPassesMask = false; 215 for(Iterator maskIt = m_msgMask.iterator(); 216 !valDatumPathPassesMask && maskIt.hasNext(); ) 217 { 218 valDatumPathPassesMask = valDatumPath.startsWith((DatumPath)(maskIt.next())); 219 } 220 221 if(valDatumPathPassesMask) 222 m_props.setProperty(valDatumPath.toString(), value); 223 } 224 } 225 226 /** recursively tokenize "guts" (a segment, or part of one) into tokens, 227 according to separators (aka delimiters) which are different at each level 228 of recursion, and to a recursive depth which is discovered through "handler" 229 via handler.delim(int) and handler.specDepth() As tokens are found, they 230 are reported to handler via handler.putDatum(), which presumably stashes them 231 away somewhere. We tell the handler about the location in the message via 232 putDatum()'s key argument, which is a List of Integers representing the 233 position in the parse tree (size() == depth of recursion). 234 235 TODO: say more. 236 */ 237 protected static void parseSegmentGuts(/*in/out*/ Handler handler, 238 /*in*/ String guts, /*in*/Vector/*<Integer>*/ nodeKey) 239 { 240 char thisDepthsDelim = handler.delim(nodeKey.size()-1); 241 //nodeKey.add(new Integer(0)); // will change nodeKey back before function exits 242 243 StringTokenizer gutsTokenizer 244 = new StringTokenizer(guts, String.valueOf(thisDepthsDelim), true); 245 while(gutsTokenizer.hasMoreTokens()) { 246 String gutsToken = gutsTokenizer.nextToken(); 247 248 if(gutsToken.charAt(0) == thisDepthsDelim) { 249 // gutsToken is all delims -- skipping over as many fields or 250 // components or whatevers as there are characters in the token: 251 int oldvalue = ((Integer)nodeKey.get(nodeKey.size()-1)).intValue(); 252 nodeKey.set(nodeKey.size()-1, new Integer(oldvalue + gutsToken.length())); 253 } 254 else { 255 if(nodeKey.size() < handler.specDepth()) { 256 nodeKey.add(new Integer(0)); 257 parseSegmentGuts(handler, gutsToken, nodeKey); 258 nodeKey.setSize(nodeKey.size()-1); 259 } 260 else 261 handler.putDatum(nodeKey, gutsToken); 262 } 263 } 264 //nodeKey.setSize(nodeKey.size()-1); // undoing add done at top of this func 265 } 266 267 public static void main(String args[]) 268 { 269 if(args.length >= 1) { 270 //String message = "MSH|^~\\&||||foo|foo|foo"; 271 System.out.println(args[0]); 272 273 Properties props = new Properties(); 274 275 Vector msgMask = new Vector(); 276 msgMask.add(new DatumPath()); 277 278 System.err.println("ER7.parseMessage returned " + parseMessage(props, msgMask, args[0])); 279 props.list(System.out); 280 } 281 } 282 283 } 284