001    /*
002     * Hl7InputStreamReader.java
003     */
004    
005    package ca.uhn.hl7v2.util;
006    
007    import java.io.BufferedReader;
008    import java.io.FileNotFoundException;
009    import java.io.IOException;
010    import java.io.InputStream;
011    import java.io.InputStreamReader;
012    import java.io.PushbackReader;
013    import java.io.Reader;
014    import java.util.ArrayList;
015    import java.util.regex.Matcher;
016    import java.util.regex.Pattern;
017    
018    import org.apache.log4j.Logger;
019    
020    import ca.uhn.hl7v2.parser.Parser;
021    import ca.uhn.hl7v2.parser.PipeParser;
022    
023    
024    /**
025     * Reads HL7 messages from an InputStream
026     * 
027     * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $
028     */
029    public class Hl7InputStreamReader {
030        
031       private static final Logger ourLog = Logger.getLogger(Hl7InputStreamReader.class);
032        
033       private InputStream is = null;
034        
035        
036        
037       /**
038        * Reads HL7 messages from an InputStream and outputs an array of HL7 message strings
039        * 
040        * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $
041        */
042        public static String[] read( InputStream theMsgInputStream )
043         throws FileNotFoundException, IOException
044        {
045            Parser hapiParser = new PipeParser(); 
046            
047            BufferedReader in =
048                new BufferedReader( 
049                    new CommentFilterReader( new InputStreamReader( theMsgInputStream ) )
050                );
051                    
052            StringBuffer rawMsgBuffer = new StringBuffer();
053            
054            int c = 0;
055                    while( (c = in.read()) >= 0) {
056                            rawMsgBuffer.append( (char) c);
057                    }
058                    
059                    String[] messages = getHL7Messages(rawMsgBuffer.toString());
060            
061            ourLog.info(messages.length + " messages sent."); 
062            
063            return messages;     
064            
065        }
066        
067       
068        
069            /** 
070             * Given a string that contains HL7 messages, and possibly other junk, 
071             * returns an array of the HL7 messages.  
072             * An attempt is made to recognize segments even if there is other 
073             * content between segments, for example if a log file logs segments 
074             * individually with timestamps between them.  
075             * 
076             * @param theSource a string containing HL7 messages 
077             * @return the HL7 messages contained in theSource
078             */
079            private static String[] getHL7Messages(String theSource) {
080                    ArrayList messages = new ArrayList(20); 
081                    Pattern startPattern = Pattern.compile("^MSH", Pattern.MULTILINE);
082                    Matcher startMatcher = startPattern.matcher(theSource);
083    
084                    while (startMatcher.find()) {
085                            String messageExtent = 
086                                    getMessageExtent(theSource.substring(startMatcher.start()), startPattern);
087                            
088                            char fieldDelim = messageExtent.charAt(3);
089                            Pattern segmentPattern = Pattern.compile("^[A-Z]{3}\\" + fieldDelim + ".*$", Pattern.MULTILINE);
090                            Matcher segmentMatcher = segmentPattern.matcher(messageExtent);
091                            StringBuffer msg = new StringBuffer();
092                            while (segmentMatcher.find()) {
093                                    msg.append(segmentMatcher.group().trim());
094                                    msg.append('\r');
095                            }
096                            messages.add(msg.toString());
097                    }
098                    return (String[]) messages.toArray(new String[0]);
099            }
100        
101            /** 
102             * Given a string that contains at least one HL7 message, returns the 
103             * smallest string that contains the first of these messages.  
104             */
105            private static String getMessageExtent(String theSource, Pattern theStartPattern) {
106                    Matcher startMatcher = theStartPattern.matcher(theSource);
107                    if (!startMatcher.find()) {
108                            throw new IllegalArgumentException(theSource + "does not contain message start pattern" 
109                                    + theStartPattern.toString());
110                    }
111            
112                    int start = startMatcher.start();
113                    int end = theSource.length();
114                    if (startMatcher.find()) {
115                            end = startMatcher.start();
116                    }
117            
118                    return theSource.substring(start, end).trim();
119            }
120        
121        
122    
123            /**
124             * TODO: this code is copied from HAPI ... should make it part of HAPI public API instead
125             * Removes C and C++ style comments from a reader stream.  C style comments are
126             * distinguished from URL protocol delimiters by the preceding colon in the
127             * latter.
128             */
129            private static class CommentFilterReader extends PushbackReader {
130            
131                    private final char[] startCPPComment = {'/', '*'};
132                    private final char[] endCPPComment = {'*', '/'};
133                    private final char[] startCComment = {'/', '/'};
134                    private final char[] endCComment = {'\n'};
135                    private final char[] protocolDelim = {':', '/', '/'};
136            
137                    public CommentFilterReader(Reader in) {
138                            super(in, 5);
139                    }
140            
141                    /**
142                     * Returns the next character, not including comments.
143                     */
144                    public int read() throws IOException {
145                            if (atSequence(protocolDelim)) {
146                                    //proceed normally
147                            } else if (atSequence(startCPPComment)) {
148                                    //skip() doesn't seem to work for some reason
149                                    while (!atSequence(endCPPComment)) super.read();
150                                    for (int i = 0; i < endCPPComment.length; i++) super.read();
151                            } else if (atSequence(startCComment)) {
152                                    while (!atSequence(endCComment)) super.read();
153                                    for (int i = 0; i < endCComment.length; i++) super.read();
154                            }
155                            int ret = super.read();
156                            if (ret == 65535) ret = -1;
157                            return ret;            
158                    }
159                    
160                    public int read(char[] cbuf, int off, int len) throws IOException {
161                            int i = -1;
162                            boolean done = false;
163                            while (++i < len) {
164                                    int next = read();
165                                    if (next == 65535 || next == -1) { //Pushback causes -1 to convert to 65535
166                                            done = true;
167                                            break;  
168                                    }
169                                    cbuf[off + i] = (char) next;
170                            }
171                            if (i == 0 && done) i = -1; 
172                            return i; 
173                    }            
174            
175                    /**
176                     * Tests incoming data for match with char sequence, resets reader when done.
177                     */
178                    private boolean atSequence(char[] sequence) throws IOException {
179                            boolean result = true;
180                            int i = -1;
181                            int[] data = new int[sequence.length];
182                            while (++i < sequence.length && result == true) {
183                                    data[i] = super.read();
184                                    if ((char) data[i] != sequence[i]) result = false; //includes case where end of stream reached
185                            }
186                            for (int j = i-1; j >= 0; j--) {
187                                    this.unread(data[j]);
188                            }
189                            return result;
190                    }        
191            }
192        
193    
194    }