001/*
002 * Hl7InputStreamReader.java
003 */
004
005package ca.uhn.hl7v2.util;
006
007import java.io.BufferedReader;
008import java.io.FileNotFoundException;
009import java.io.IOException;
010import java.io.InputStream;
011import java.io.InputStreamReader;
012import java.io.PushbackReader;
013import java.io.Reader;
014import java.util.ArrayList;
015import java.util.List;
016import java.util.regex.Matcher;
017import java.util.regex.Pattern;
018
019import org.slf4j.Logger;
020import org.slf4j.LoggerFactory;
021
022
023/**
024 * Reads HL7 messages from an InputStream
025 * 
026 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $
027 * @deprecated see {@link Hl7InputStreamMessageIterator} or {@link Hl7InputStreamMessageStringIterator}
028 */
029public class Hl7InputStreamReader {
030    
031   private static final Logger ourLog = LoggerFactory.getLogger(Hl7InputStreamReader.class);
032
033    
034   /**
035    * Reads HL7 messages from an InputStream and outputs an array of HL7 message strings
036    * 
037    * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $
038    */
039    public static String[] read( InputStream theMsgInputStream )
040     throws FileNotFoundException, IOException
041    {        
042        BufferedReader in =
043            new BufferedReader( 
044                new CommentFilterReader( new InputStreamReader( theMsgInputStream ) )
045            );
046                
047        StringBuffer rawMsgBuffer = new StringBuffer();
048        
049        int c = 0;
050                while( (c = in.read()) >= 0) {
051                        rawMsgBuffer.append( (char) c);
052                }
053                
054                String[] messages = getHL7Messages(rawMsgBuffer.toString());
055        
056        ourLog.info(messages.length + " messages sent."); 
057        
058        return messages;     
059        
060    }
061    
062   
063    
064        /** 
065         * Given a string that contains HL7 messages, and possibly other junk, 
066         * returns an array of the HL7 messages.  
067         * An attempt is made to recognize segments even if there is other 
068         * content between segments, for example if a log file logs segments 
069         * individually with timestamps between them.  
070         * 
071         * @param theSource a string containing HL7 messages 
072         * @return the HL7 messages contained in theSource
073         */
074        private static String[] getHL7Messages(String theSource) {
075                List<String> messages = new ArrayList<String>(20); 
076                Pattern startPattern = Pattern.compile("^MSH", Pattern.MULTILINE);
077                Matcher startMatcher = startPattern.matcher(theSource);
078
079                while (startMatcher.find()) {
080                        String messageExtent = 
081                                getMessageExtent(theSource.substring(startMatcher.start()), startPattern);
082                        
083                        char fieldDelim = messageExtent.charAt(3);
084                        Pattern segmentPattern = Pattern.compile("^[A-Z]{3}\\" + fieldDelim + ".*$", Pattern.MULTILINE);
085                        Matcher segmentMatcher = segmentPattern.matcher(messageExtent);
086                        StringBuffer msg = new StringBuffer();
087                        while (segmentMatcher.find()) {
088                                msg.append(segmentMatcher.group().trim());
089                                msg.append('\r');
090                        }
091                        messages.add(msg.toString());
092                }
093                return messages.toArray(new String[0]);
094        }
095    
096        /** 
097         * Given a string that contains at least one HL7 message, returns the 
098         * smallest string that contains the first of these messages.  
099         */
100        private static String getMessageExtent(String theSource, Pattern theStartPattern) {
101                Matcher startMatcher = theStartPattern.matcher(theSource);
102                if (!startMatcher.find()) {
103                        throw new IllegalArgumentException(theSource + "does not contain message start pattern" 
104                                + theStartPattern.toString());
105                }
106        
107                int start = startMatcher.start();
108                int end = theSource.length();
109                if (startMatcher.find()) {
110                        end = startMatcher.start();
111                }
112        
113                return theSource.substring(start, end).trim();
114        }
115    
116    
117
118        /**
119         * TODO: this code is copied from HAPI ... should make it part of HAPI public API instead
120         * Removes C and C++ style comments from a reader stream.  C style comments are
121         * distinguished from URL protocol delimiters by the preceding colon in the
122         * latter.
123         */
124        private static class CommentFilterReader extends PushbackReader {
125        
126                private final char[] startCPPComment = {'/', '*'};
127                private final char[] endCPPComment = {'*', '/'};
128                private final char[] startCComment = {'/', '/'};
129                private final char[] endCComment = {'\n'};
130                private final char[] protocolDelim = {':', '/', '/'};
131        
132                public CommentFilterReader(Reader in) {
133                        super(in, 5);
134                }
135        
136                /**
137                 * Returns the next character, not including comments.
138                 */
139                public int read() throws IOException {
140                        if (atSequence(protocolDelim)) {
141                                //proceed normally
142                        } else if (atSequence(startCPPComment)) {
143                                //skip() doesn't seem to work for some reason
144                                while (!atSequence(endCPPComment)) super.read();
145                                for (int i = 0; i < endCPPComment.length; i++) super.read();
146                        } else if (atSequence(startCComment)) {
147                                while (!atSequence(endCComment)) super.read();
148                                for (int i = 0; i < endCComment.length; i++) super.read();
149                        }
150                        int ret = super.read();
151                        if (ret == 65535) ret = -1;
152                        return ret;            
153                }
154                
155                public int read(char[] cbuf, int off, int len) throws IOException {
156                        int i = -1;
157                        boolean done = false;
158                        while (++i < len) {
159                                int next = read();
160                                if (next == 65535 || next == -1) { //Pushback causes -1 to convert to 65535
161                                        done = true;
162                                        break;  
163                                }
164                                cbuf[off + i] = (char) next;
165                        }
166                        if (i == 0 && done) i = -1; 
167                        return i; 
168                }            
169        
170                /**
171                 * Tests incoming data for match with char sequence, resets reader when done.
172                 */
173                private boolean atSequence(char[] sequence) throws IOException {
174                        boolean result = true;
175                        int i = -1;
176                        int[] data = new int[sequence.length];
177                        while (++i < sequence.length && result == true) {
178                                data[i] = super.read();
179                                if ((char) data[i] != sequence[i]) result = false; //includes case where end of stream reached
180                        }
181                        for (int j = i-1; j >= 0; j--) {
182                                this.unread(data[j]);
183                        }
184                        return result;
185                }        
186        }
187    
188
189}