001    /*
002     * Hl7InputStreamReader.java
003     */
004    
005    package ca.uhn.hl7v2.util;
006    
007    import java.io.BufferedReader;
008    import java.io.FileNotFoundException;
009    import java.io.IOException;
010    import java.io.InputStream;
011    import java.io.InputStreamReader;
012    import java.io.PushbackReader;
013    import java.io.Reader;
014    import java.util.ArrayList;
015    import java.util.List;
016    import java.util.regex.Matcher;
017    import java.util.regex.Pattern;
018    
019    import org.slf4j.Logger;
020    import org.slf4j.LoggerFactory;
021    
022    
023    /**
024     * Reads HL7 messages from an InputStream
025     * 
026     * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $
027     * @deprecated see {@link Hl7InputStreamMessageIterator} or {@link Hl7InputStreamMessageStringIterator}
028     */
029    public class Hl7InputStreamReader {
030        
031       private static final Logger ourLog = LoggerFactory.getLogger(Hl7InputStreamReader.class);
032    
033        
034       /**
035        * Reads HL7 messages from an InputStream and outputs an array of HL7 message strings
036        * 
037        * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $
038        */
039        public static String[] read( InputStream theMsgInputStream )
040         throws FileNotFoundException, IOException
041        {        
042            BufferedReader in =
043                new BufferedReader( 
044                    new CommentFilterReader( new InputStreamReader( theMsgInputStream ) )
045                );
046                    
047            StringBuffer rawMsgBuffer = new StringBuffer();
048            
049            int c = 0;
050                    while( (c = in.read()) >= 0) {
051                            rawMsgBuffer.append( (char) c);
052                    }
053                    
054                    String[] messages = getHL7Messages(rawMsgBuffer.toString());
055            
056            ourLog.info(messages.length + " messages sent."); 
057            
058            return messages;     
059            
060        }
061        
062       
063        
064            /** 
065             * Given a string that contains HL7 messages, and possibly other junk, 
066             * returns an array of the HL7 messages.  
067             * An attempt is made to recognize segments even if there is other 
068             * content between segments, for example if a log file logs segments 
069             * individually with timestamps between them.  
070             * 
071             * @param theSource a string containing HL7 messages 
072             * @return the HL7 messages contained in theSource
073             */
074            private static String[] getHL7Messages(String theSource) {
075                    List<String> messages = new ArrayList<String>(20); 
076                    Pattern startPattern = Pattern.compile("^MSH", Pattern.MULTILINE);
077                    Matcher startMatcher = startPattern.matcher(theSource);
078    
079                    while (startMatcher.find()) {
080                            String messageExtent = 
081                                    getMessageExtent(theSource.substring(startMatcher.start()), startPattern);
082                            
083                            char fieldDelim = messageExtent.charAt(3);
084                            Pattern segmentPattern = Pattern.compile("^[A-Z]{3}\\" + fieldDelim + ".*$", Pattern.MULTILINE);
085                            Matcher segmentMatcher = segmentPattern.matcher(messageExtent);
086                            StringBuffer msg = new StringBuffer();
087                            while (segmentMatcher.find()) {
088                                    msg.append(segmentMatcher.group().trim());
089                                    msg.append('\r');
090                            }
091                            messages.add(msg.toString());
092                    }
093                    return messages.toArray(new String[0]);
094            }
095        
096            /** 
097             * Given a string that contains at least one HL7 message, returns the 
098             * smallest string that contains the first of these messages.  
099             */
100            private static String getMessageExtent(String theSource, Pattern theStartPattern) {
101                    Matcher startMatcher = theStartPattern.matcher(theSource);
102                    if (!startMatcher.find()) {
103                            throw new IllegalArgumentException(theSource + "does not contain message start pattern" 
104                                    + theStartPattern.toString());
105                    }
106            
107                    int start = startMatcher.start();
108                    int end = theSource.length();
109                    if (startMatcher.find()) {
110                            end = startMatcher.start();
111                    }
112            
113                    return theSource.substring(start, end).trim();
114            }
115        
116        
117    
118            /**
119             * TODO: this code is copied from HAPI ... should make it part of HAPI public API instead
120             * Removes C and C++ style comments from a reader stream.  C style comments are
121             * distinguished from URL protocol delimiters by the preceding colon in the
122             * latter.
123             */
124            private static class CommentFilterReader extends PushbackReader {
125            
126                    private final char[] startCPPComment = {'/', '*'};
127                    private final char[] endCPPComment = {'*', '/'};
128                    private final char[] startCComment = {'/', '/'};
129                    private final char[] endCComment = {'\n'};
130                    private final char[] protocolDelim = {':', '/', '/'};
131            
132                    public CommentFilterReader(Reader in) {
133                            super(in, 5);
134                    }
135            
136                    /**
137                     * Returns the next character, not including comments.
138                     */
139                    public int read() throws IOException {
140                            if (atSequence(protocolDelim)) {
141                                    //proceed normally
142                            } else if (atSequence(startCPPComment)) {
143                                    //skip() doesn't seem to work for some reason
144                                    while (!atSequence(endCPPComment)) super.read();
145                                    for (int i = 0; i < endCPPComment.length; i++) super.read();
146                            } else if (atSequence(startCComment)) {
147                                    while (!atSequence(endCComment)) super.read();
148                                    for (int i = 0; i < endCComment.length; i++) super.read();
149                            }
150                            int ret = super.read();
151                            if (ret == 65535) ret = -1;
152                            return ret;            
153                    }
154                    
155                    public int read(char[] cbuf, int off, int len) throws IOException {
156                            int i = -1;
157                            boolean done = false;
158                            while (++i < len) {
159                                    int next = read();
160                                    if (next == 65535 || next == -1) { //Pushback causes -1 to convert to 65535
161                                            done = true;
162                                            break;  
163                                    }
164                                    cbuf[off + i] = (char) next;
165                            }
166                            if (i == 0 && done) i = -1; 
167                            return i; 
168                    }            
169            
170                    /**
171                     * Tests incoming data for match with char sequence, resets reader when done.
172                     */
173                    private boolean atSequence(char[] sequence) throws IOException {
174                            boolean result = true;
175                            int i = -1;
176                            int[] data = new int[sequence.length];
177                            while (++i < sequence.length && result == true) {
178                                    data[i] = super.read();
179                                    if ((char) data[i] != sequence[i]) result = false; //includes case where end of stream reached
180                            }
181                            for (int j = i-1; j >= 0; j--) {
182                                    this.unread(data[j]);
183                            }
184                            return result;
185                    }        
186            }
187        
188    
189    }