001/**
002The contents of this file are subject to the Mozilla Public License Version 1.1 
003(the "License"); you may not use this file except in compliance with the License. 
004You may obtain a copy of the License at http://www.mozilla.org/MPL/ 
005Software distributed under the License is distributed on an "AS IS" basis, 
006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 
007specific language governing rights and limitations under the License. 
008
009The Initial Developer of the Original Code is University Health Network. Copyright (C) 
0102001.  All Rights Reserved. 
011
012Contributor(s): Jens Kristian Villadsen from Cetrea A/S
013
014Alternatively, the contents of this file may be used under the terms of the 
015GNU General Public License (the "GPL"), in which case the provisions of the GPL are 
016applicable instead of those above.  If you wish to allow use of your version of this 
017file only under the terms of the GPL and not to allow others to use your version 
018of this file under the MPL, indicate your decision by deleting  the provisions above 
019and replace  them with the notice and other provisions required by the GPL License.  
020If you do not delete the provisions above, a recipient may use your version of 
021this file under either the MPL or the GPL. 
022
023*/
024
025package ca.uhn.hl7v2.llp;
026
027import static ca.uhn.hl7v2.llp.MinLLPReader.*;
028
029import java.io.ByteArrayInputStream;
030import java.io.ByteArrayOutputStream;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.InputStreamReader;
034import java.net.SocketException;
035import java.net.SocketTimeoutException;
036import java.nio.charset.Charset;
037
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import ca.uhn.hl7v2.preparser.PreParser;
042
043/**
044 * Charset-aware MLLP stream reader
045 * 
046 * @see ExtendedMinLowerLayerProtocol
047 * @author Jens Kristian Villadsen from Cetrea A/S
048 */
049public class ExtendedMinLLPReader implements HL7Reader
050{
051
052        private static final Logger log = LoggerFactory.getLogger(ExtendedMinLLPReader.class);
053
054        private InputStream inputStream;
055        private Charset myLastCharset;
056        private InputStreamReader myReader;
057
058        /**
059         * Creates a MinLLPReader with no setup - setInputStream must be set later.
060         */
061        public ExtendedMinLLPReader()
062        {
063                super();
064        }
065
066        /**
067         * Creates a MinLLPReader which reads from the given InputStream. The stream is assumed to be an ASCII bit stream.
068         */
069        public ExtendedMinLLPReader(InputStream in) throws IOException
070        {
071                setInputStream(in);
072        }
073
074        /**
075         * Closes the underlying BufferedReader.
076         */
077        public synchronized void close() throws java.io.IOException
078        {
079                myReader.close();
080        }
081
082        private Charset getCharacterEncoding(InputStream in) throws IOException
083        {
084                ByteArrayOutputStream bos = new ByteArrayOutputStream();
085                int next = in.read();
086                while((next != -1 || bos.size() == 0) && next != END_MESSAGE && next != LAST_CHARACTER)
087                {
088                        bos.write(next);
089                        next = in.read();
090                }
091                bos.flush();
092                
093                try
094                {
095                        String firstLine;
096                        if ((bos.toByteArray()[0] == -2 && bos.toByteArray()[1] == -1) ||
097                                        bos.toByteArray()[1] == -2 && bos.toByteArray()[0] == -1) {
098                                
099                                // if the string is little endian, then we will be missing the second byte of the 
100                                // last character (a "\r"), so add it manually
101                                if (bos.toByteArray()[1] == -2 && bos.toByteArray()[0] == -1) {
102                                        bos.write(0);
103                                }
104                                
105                                firstLine = bos.toString("UTF-16");
106                        } else {
107                                firstLine = bos.toString("US-ASCII");
108                        }
109                        
110                        String[] fields = PreParser.getFields(firstLine, "MSH-18(0)");
111                        String charset = stripNonLowAscii(fields[0]);
112                        Charset javaCs = CharSetUtil.convertHL7CharacterEncodingToCharSetvalue(charset);                        
113                        log.debug("Detected MSH-18 value \"{}\" so using charset {}", charset, javaCs.displayName());                   
114                        return javaCs;
115                }
116                catch(Exception e)
117                {
118                        log.warn("Nonvalid charset - defaulting to US-ASCII", e);
119                }
120                finally
121                {
122                        bos.close();
123                }
124                return Charset.forName("US-ASCII");
125        }
126
127        private String stripNonLowAscii(String theString) {
128                if (theString == null) return "";
129                StringBuilder b = new StringBuilder();
130                
131                for (int i = 0; i < theString.length(); i++) {
132                        char next = theString.charAt(i);
133                        if (next > 0 && next < 127) {
134                                b.append(next);
135                        }
136                }
137                
138                return b.toString();
139        }
140
141        /**
142         * @return the lastCharset
143         */
144        public Charset getLastCharset() {
145                return myLastCharset;
146        }
147
148        public synchronized String getMessage() throws LLPException, IOException
149        {
150                ByteArrayOutputStream baos = null;
151                baos = verifyAndCopyToOutputStream(this.inputStream);
152
153                if(baos == null)
154                        return null;
155                
156                byte[] byteArray = baos.toByteArray();
157                myLastCharset = getCharacterEncoding(new ByteArrayInputStream(byteArray));
158
159                myReader = new InputStreamReader(new ByteArrayInputStream(byteArray), myLastCharset);
160                baos.close();
161
162                StringBuffer s_buffer = new StringBuffer();
163
164                int c = myReader.read();
165                while(c != -1)
166                {
167                        s_buffer.append((char) c);
168                        c = myReader.read();
169                }
170                return s_buffer.toString();
171        }
172
173        /**
174         * Sets the InputStream from which to read messages. The InputStream must be set before any calls to <code>getMessage()</code>.
175         */
176        public synchronized void setInputStream(InputStream in) throws IOException
177        {
178                this.inputStream = in;
179        }
180
181        private ByteArrayOutputStream verifyAndCopyToOutputStream(InputStream stream) throws IOException, LLPException
182        {
183                ByteArrayOutputStream bos = new ByteArrayOutputStream();
184                boolean end_of_message = false;
185
186                int c = 0;
187                try
188                {
189                        c = stream.read();
190                }
191                catch(SocketException e)
192                {
193                        log.info("SocketException on read() attempt.  Socket appears to have been closed: {}", e.getMessage());
194                        throw e;
195                }
196                catch(SocketTimeoutException e)
197                {
198                        log.debug("SocketTimeoutException on read() attempt.");
199                        return null;
200                }
201                // trying to read when there is no data (stream may have been closed at other end)
202                if(c == -1)
203                {
204                        log.info("End of input stream reached.");
205                        throw new SocketException("End of input stream reached");
206                }
207                LowerLayerProtocol.logCharacterReceived(c);
208
209                if(c != START_MESSAGE)
210                {
211                        throw new LLPException("Message violates the " + "minimal lower layer protocol: no start of message indicator " + "received. Received: " + c);
212                }
213
214                while(!end_of_message)
215                {
216                        c = stream.read();
217
218                        if(c == -1)
219                        {
220                                throw new LLPException("Message violates the " + "minimal lower protocol: message terminated without " + "a terminating character.");
221                        }
222                        LowerLayerProtocol.logCharacterReceived(c);
223
224                        if(c == END_MESSAGE)
225                        {
226                                // subsequent character should be a carriage return
227                                c = stream.read();
228                                if(c >= 0)
229                                        LowerLayerProtocol.logCharacterReceived(c);
230                                if(c != LAST_CHARACTER)
231                                {
232                                        throw new LLPException("Message " + "violates the minimal lower layer protocol: " + "message terminator not followed by a return " + "character.");
233                                }
234                                end_of_message = true;
235                        }
236                        else
237                        {
238                                // the character wasn't the end of message, append it to the message
239                                bos.write(c);
240                        }
241                }
242
243                bos.flush();
244                return bos;
245        }
246}