001 /*
002 * Hl7InputStreamReader.java
003 */
004
005 package ca.uhn.hl7v2.util;
006
007 import java.io.BufferedReader;
008 import java.io.FileNotFoundException;
009 import java.io.IOException;
010 import java.io.InputStream;
011 import java.io.InputStreamReader;
012 import java.io.PushbackReader;
013 import java.io.Reader;
014 import java.util.ArrayList;
015 import java.util.List;
016 import java.util.regex.Matcher;
017 import java.util.regex.Pattern;
018
019 import org.slf4j.Logger;
020 import org.slf4j.LoggerFactory;
021
022
023 /**
024 * Reads HL7 messages from an InputStream
025 *
026 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $
027 * @deprecated see {@link Hl7InputStreamMessageIterator} or {@link Hl7InputStreamMessageStringIterator}
028 */
029 public class Hl7InputStreamReader {
030
031 private static final Logger ourLog = LoggerFactory.getLogger(Hl7InputStreamReader.class);
032
033
034 /**
035 * Reads HL7 messages from an InputStream and outputs an array of HL7 message strings
036 *
037 * @version $Revision: 1.1 $ updated on $Date: 2007-02-19 02:24:27 $ by $Author: jamesagnew $
038 */
039 public static String[] read( InputStream theMsgInputStream )
040 throws FileNotFoundException, IOException
041 {
042 BufferedReader in =
043 new BufferedReader(
044 new CommentFilterReader( new InputStreamReader( theMsgInputStream ) )
045 );
046
047 StringBuffer rawMsgBuffer = new StringBuffer();
048
049 int c = 0;
050 while( (c = in.read()) >= 0) {
051 rawMsgBuffer.append( (char) c);
052 }
053
054 String[] messages = getHL7Messages(rawMsgBuffer.toString());
055
056 ourLog.info(messages.length + " messages sent.");
057
058 return messages;
059
060 }
061
062
063
064 /**
065 * Given a string that contains HL7 messages, and possibly other junk,
066 * returns an array of the HL7 messages.
067 * An attempt is made to recognize segments even if there is other
068 * content between segments, for example if a log file logs segments
069 * individually with timestamps between them.
070 *
071 * @param theSource a string containing HL7 messages
072 * @return the HL7 messages contained in theSource
073 */
074 private static String[] getHL7Messages(String theSource) {
075 List<String> messages = new ArrayList<String>(20);
076 Pattern startPattern = Pattern.compile("^MSH", Pattern.MULTILINE);
077 Matcher startMatcher = startPattern.matcher(theSource);
078
079 while (startMatcher.find()) {
080 String messageExtent =
081 getMessageExtent(theSource.substring(startMatcher.start()), startPattern);
082
083 char fieldDelim = messageExtent.charAt(3);
084 Pattern segmentPattern = Pattern.compile("^[A-Z]{3}\\" + fieldDelim + ".*$", Pattern.MULTILINE);
085 Matcher segmentMatcher = segmentPattern.matcher(messageExtent);
086 StringBuffer msg = new StringBuffer();
087 while (segmentMatcher.find()) {
088 msg.append(segmentMatcher.group().trim());
089 msg.append('\r');
090 }
091 messages.add(msg.toString());
092 }
093 return messages.toArray(new String[0]);
094 }
095
096 /**
097 * Given a string that contains at least one HL7 message, returns the
098 * smallest string that contains the first of these messages.
099 */
100 private static String getMessageExtent(String theSource, Pattern theStartPattern) {
101 Matcher startMatcher = theStartPattern.matcher(theSource);
102 if (!startMatcher.find()) {
103 throw new IllegalArgumentException(theSource + "does not contain message start pattern"
104 + theStartPattern.toString());
105 }
106
107 int start = startMatcher.start();
108 int end = theSource.length();
109 if (startMatcher.find()) {
110 end = startMatcher.start();
111 }
112
113 return theSource.substring(start, end).trim();
114 }
115
116
117
118 /**
119 * TODO: this code is copied from HAPI ... should make it part of HAPI public API instead
120 * Removes C and C++ style comments from a reader stream. C style comments are
121 * distinguished from URL protocol delimiters by the preceding colon in the
122 * latter.
123 */
124 private static class CommentFilterReader extends PushbackReader {
125
126 private final char[] startCPPComment = {'/', '*'};
127 private final char[] endCPPComment = {'*', '/'};
128 private final char[] startCComment = {'/', '/'};
129 private final char[] endCComment = {'\n'};
130 private final char[] protocolDelim = {':', '/', '/'};
131
132 public CommentFilterReader(Reader in) {
133 super(in, 5);
134 }
135
136 /**
137 * Returns the next character, not including comments.
138 */
139 public int read() throws IOException {
140 if (atSequence(protocolDelim)) {
141 //proceed normally
142 } else if (atSequence(startCPPComment)) {
143 //skip() doesn't seem to work for some reason
144 while (!atSequence(endCPPComment)) super.read();
145 for (int i = 0; i < endCPPComment.length; i++) super.read();
146 } else if (atSequence(startCComment)) {
147 while (!atSequence(endCComment)) super.read();
148 for (int i = 0; i < endCComment.length; i++) super.read();
149 }
150 int ret = super.read();
151 if (ret == 65535) ret = -1;
152 return ret;
153 }
154
155 public int read(char[] cbuf, int off, int len) throws IOException {
156 int i = -1;
157 boolean done = false;
158 while (++i < len) {
159 int next = read();
160 if (next == 65535 || next == -1) { //Pushback causes -1 to convert to 65535
161 done = true;
162 break;
163 }
164 cbuf[off + i] = (char) next;
165 }
166 if (i == 0 && done) i = -1;
167 return i;
168 }
169
170 /**
171 * Tests incoming data for match with char sequence, resets reader when done.
172 */
173 private boolean atSequence(char[] sequence) throws IOException {
174 boolean result = true;
175 int i = -1;
176 int[] data = new int[sequence.length];
177 while (++i < sequence.length && result == true) {
178 data[i] = super.read();
179 if ((char) data[i] != sequence[i]) result = false; //includes case where end of stream reached
180 }
181 for (int j = i-1; j >= 0; j--) {
182 this.unread(data[j]);
183 }
184 return result;
185 }
186 }
187
188
189 }