001 /* 002 * Hl7InputStreamReader.java 003 */ 004 005 package ca.uhn.hl7v2.util; 006 007 import java.io.BufferedReader; 008 import java.io.FileNotFoundException; 009 import java.io.IOException; 010 import java.io.InputStream; 011 import java.io.InputStreamReader; 012 import java.io.PushbackReader; 013 import java.io.Reader; 014 import java.util.ArrayList; 015 import java.util.regex.Matcher; 016 import java.util.regex.Pattern; 017 018 import org.apache.log4j.Logger; 019 020 import ca.uhn.hl7v2.parser.Parser; 021 import ca.uhn.hl7v2.parser.PipeParser; 022 023 024 /** 025 * Reads HL7 messages from an InputStream 026 * 027 * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $ 028 */ 029 public class Hl7InputStreamReader { 030 031 private static final Logger ourLog = Logger.getLogger(Hl7InputStreamReader.class); 032 033 private InputStream is = null; 034 035 036 037 /** 038 * Reads HL7 messages from an InputStream and outputs an array of HL7 message strings 039 * 040 * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $ 041 */ 042 public static String[] read( InputStream theMsgInputStream ) 043 throws FileNotFoundException, IOException 044 { 045 Parser hapiParser = new PipeParser(); 046 047 BufferedReader in = 048 new BufferedReader( 049 new CommentFilterReader( new InputStreamReader( theMsgInputStream ) ) 050 ); 051 052 StringBuffer rawMsgBuffer = new StringBuffer(); 053 054 int c = 0; 055 while( (c = in.read()) >= 0) { 056 rawMsgBuffer.append( (char) c); 057 } 058 059 String[] messages = getHL7Messages(rawMsgBuffer.toString()); 060 061 ourLog.info(messages.length + " messages sent."); 062 063 return messages; 064 065 } 066 067 068 069 /** 070 * Given a string that contains HL7 messages, and possibly other junk, 071 * returns an array of the HL7 messages. 072 * An attempt is made to recognize segments even if there is other 073 * content between segments, for example if a log file logs segments 074 * individually with timestamps between them. 075 * 076 * @param theSource a string containing HL7 messages 077 * @return the HL7 messages contained in theSource 078 */ 079 private static String[] getHL7Messages(String theSource) { 080 ArrayList messages = new ArrayList(20); 081 Pattern startPattern = Pattern.compile("^MSH", Pattern.MULTILINE); 082 Matcher startMatcher = startPattern.matcher(theSource); 083 084 while (startMatcher.find()) { 085 String messageExtent = 086 getMessageExtent(theSource.substring(startMatcher.start()), startPattern); 087 088 char fieldDelim = messageExtent.charAt(3); 089 Pattern segmentPattern = Pattern.compile("^[A-Z]{3}\\" + fieldDelim + ".*$", Pattern.MULTILINE); 090 Matcher segmentMatcher = segmentPattern.matcher(messageExtent); 091 StringBuffer msg = new StringBuffer(); 092 while (segmentMatcher.find()) { 093 msg.append(segmentMatcher.group().trim()); 094 msg.append('\r'); 095 } 096 messages.add(msg.toString()); 097 } 098 return (String[]) messages.toArray(new String[0]); 099 } 100 101 /** 102 * Given a string that contains at least one HL7 message, returns the 103 * smallest string that contains the first of these messages. 104 */ 105 private static String getMessageExtent(String theSource, Pattern theStartPattern) { 106 Matcher startMatcher = theStartPattern.matcher(theSource); 107 if (!startMatcher.find()) { 108 throw new IllegalArgumentException(theSource + "does not contain message start pattern" 109 + theStartPattern.toString()); 110 } 111 112 int start = startMatcher.start(); 113 int end = theSource.length(); 114 if (startMatcher.find()) { 115 end = startMatcher.start(); 116 } 117 118 return theSource.substring(start, end).trim(); 119 } 120 121 122 123 /** 124 * TODO: this code is copied from HAPI ... should make it part of HAPI public API instead 125 * Removes C and C++ style comments from a reader stream. C style comments are 126 * distinguished from URL protocol delimiters by the preceding colon in the 127 * latter. 128 */ 129 private static class CommentFilterReader extends PushbackReader { 130 131 private final char[] startCPPComment = {'/', '*'}; 132 private final char[] endCPPComment = {'*', '/'}; 133 private final char[] startCComment = {'/', '/'}; 134 private final char[] endCComment = {'\n'}; 135 private final char[] protocolDelim = {':', '/', '/'}; 136 137 public CommentFilterReader(Reader in) { 138 super(in, 5); 139 } 140 141 /** 142 * Returns the next character, not including comments. 143 */ 144 public int read() throws IOException { 145 if (atSequence(protocolDelim)) { 146 //proceed normally 147 } else if (atSequence(startCPPComment)) { 148 //skip() doesn't seem to work for some reason 149 while (!atSequence(endCPPComment)) super.read(); 150 for (int i = 0; i < endCPPComment.length; i++) super.read(); 151 } else if (atSequence(startCComment)) { 152 while (!atSequence(endCComment)) super.read(); 153 for (int i = 0; i < endCComment.length; i++) super.read(); 154 } 155 int ret = super.read(); 156 if (ret == 65535) ret = -1; 157 return ret; 158 } 159 160 public int read(char[] cbuf, int off, int len) throws IOException { 161 int i = -1; 162 boolean done = false; 163 while (++i < len) { 164 int next = read(); 165 if (next == 65535 || next == -1) { //Pushback causes -1 to convert to 65535 166 done = true; 167 break; 168 } 169 cbuf[off + i] = (char) next; 170 } 171 if (i == 0 && done) i = -1; 172 return i; 173 } 174 175 /** 176 * Tests incoming data for match with char sequence, resets reader when done. 177 */ 178 private boolean atSequence(char[] sequence) throws IOException { 179 boolean result = true; 180 int i = -1; 181 int[] data = new int[sequence.length]; 182 while (++i < sequence.length && result == true) { 183 data[i] = super.read(); 184 if ((char) data[i] != sequence[i]) result = false; //includes case where end of stream reached 185 } 186 for (int j = i-1; j >= 0; j--) { 187 this.unread(data[j]); 188 } 189 return result; 190 } 191 } 192 193 194 }