001/**
002The contents of this file are subject to the Mozilla Public License Version 1.1 
003(the "License"); you may not use this file except in compliance with the License. 
004You may obtain a copy of the License at http://www.mozilla.org/MPL/ 
005Software distributed under the License is distributed on an "AS IS" basis, 
006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 
007specific language governing rights and limitations under the License. 
008
009The Original Code is "Escape.java".  Description: 
010"Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
011defined in section 2.10 of the standard (version 2.4)" 
012
013The Initial Developer of the Original Code is University Health Network. Copyright (C) 
0142001.  All Rights Reserved. 
015
016Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 
017
018Alternatively, the contents of this file may be used under the terms of the 
019GNU General Public License (the  �GPL�), in which case the provisions of the GPL are 
020applicable instead of those above.  If you wish to allow use of your version of this 
021file only under the terms of the GPL and not to allow others to use your version 
022of this file under the MPL, indicate your decision by deleting  the provisions above 
023and replace  them with the notice and other provisions required by the GPL License.  
024If you do not delete the provisions above, a recipient may use your version of 
025this file under either the MPL or the GPL. 
026 */
027package ca.uhn.hl7v2.parser;
028
029import java.util.Collections;
030import java.util.LinkedHashMap;
031import java.util.Map;
032
033/**
034 * Handles "escaping" and "unescaping" of text according to the HL7 escape
035 * sequence rules defined in section 2.10 of the standard (version 2.4).
036 * Currently, escape sequences for multiple character sets are unsupported. The
037 * highlighting, hexademical, and locally defined escape sequences are also
038 * unsupported.
039 * 
040 * @author Bryan Tripp
041 * @author Mark Lee (Skeva Technologies)
042 * @author Elmar Hinz
043 * @author Christian Ohr
044 */
045public class Escape {
046
047    /**
048     * limits the size of variousEncChars to 1000, can be overridden by system property.
049     */
050    private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) {
051
052        private static final long serialVersionUID = 1L;
053        final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
054
055        @Override
056        protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
057            return this.size() > maxSize;
058        }
059    });
060
061    /** Creates a new instance of Escape */
062    public Escape() {
063    }
064
065    public static String escape(String text, EncodingCharacters encChars) {
066        EncLookup esc = getEscapeSequences(encChars);
067        int textLength = text.length();
068
069        StringBuilder result = new StringBuilder(textLength);
070        for (int i = 0; i < textLength; i++) {
071            boolean charReplaced = false;
072            char c = text.charAt(i);
073
074            FORENCCHARS:
075                        for (int j = 0; j < 6; j++) {
076                if (text.charAt(i) == esc.characters[j]) {
077
078                                        // Formatting escape sequences such as /.br/ should be left alone
079                                        if (j == 4) {
080                                                
081                                                if (i+1 < textLength) {
082                                                        
083                                                        // Check for \.br\
084                                                        char nextChar = text.charAt(i + 1);
085                                                        switch (nextChar) {
086                                                        case '.':
087                                                        case 'C':
088                                                        case 'M':
089                                                        case 'X':
090                                                        case 'Z':
091                                                        {
092                                                                int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
093                                                                if (nextEscapeIndex > 0) {
094                                                                        result.append(text.substring(i, nextEscapeIndex + 1));
095                                                                        charReplaced = true;
096                                                                        i = nextEscapeIndex;
097                                                                        break FORENCCHARS;
098                                                                }
099                                                                break;
100                                                        }
101                                                        case 'H':
102                                                        case 'N':
103                                                        {
104                                                                if (i+2 < textLength && text.charAt(i+2) == '\\') {
105                                                                        int nextEscapeIndex = i + 2;
106                                                                        if (nextEscapeIndex > 0) {
107                                                                                result.append(text.substring(i, nextEscapeIndex + 1));
108                                                                                charReplaced = true;
109                                                                                i = nextEscapeIndex;
110                                                                                break FORENCCHARS;
111                                                                        }
112                                                                }
113                                                                break;
114                                                        }
115                                                        }
116                                                        
117                                                }
118                                                
119                                        }
120
121                    result.append(esc.encodings[j]);
122                    charReplaced = true;
123                    break;
124                }
125            }
126            if (!charReplaced) {
127                result.append(c);
128            }
129        }
130        return result.toString();
131    }
132
133    public static String unescape(String text, EncodingCharacters encChars) {
134
135        // If the escape char isn't found, we don't need to look for escape sequences
136        char escapeChar = encChars.getEscapeCharacter();
137        boolean foundEscapeChar = false;
138        for (int i = 0; i < text.length(); i++) {
139            if (text.charAt(i) == escapeChar) {
140                foundEscapeChar = true;
141                break;
142            }
143        }
144        if (foundEscapeChar == false) {
145            return text;
146        }
147
148        int textLength = text.length();
149        StringBuilder result = new StringBuilder(textLength + 20);
150        EncLookup esc = getEscapeSequences(encChars);
151        char escape = esc.characters[4];
152        int encodingsCount = esc.characters.length;
153        int i = 0;
154        while (i < textLength) {
155            char c = text.charAt(i);
156            if (c != escape) {
157                result.append(c);
158                i++;
159            } else {
160                boolean foundEncoding = false;
161
162                                // Test against the standard encodings
163                                for (int j = 0; j < encodingsCount; j++) {
164                    String encoding = esc.encodings[j];
165                                        int encodingLength = encoding.length();
166                                        if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
167                            .equals(encoding)) {
168                        result.append(esc.characters[j]);
169                        i += encodingLength;
170                        foundEncoding = true;
171                        break;
172                    }
173                }
174
175                if (!foundEncoding) {
176                                        
177                                        // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
178                                        // formatting codes. They should be left intact
179                                        if (i + 1 < textLength) {
180                                                char nextChar = text.charAt(i + 1);
181                                                switch (nextChar) {
182                                                        case '.':
183                                                        case 'C':
184                                                        case 'M':
185                                                        case 'X':
186                                                        case 'Z':
187                                                        {
188                                                                int closingEscape = text.indexOf(escape, i + 1);
189                                                                if (closingEscape > 0) {
190                                                                        String substring = text.substring(i, closingEscape + 1);
191                                                                        result.append(substring);
192                                                                        i += substring.length();
193                                                                } else {
194                                                                        i++;
195                                                                }
196                                                                break;
197                                                        }
198                                                        case 'H':
199                                                        case 'N':
200                                                        {
201                                                                int closingEscape = text.indexOf(escape, i + 1);
202                                                                if (closingEscape == i + 2) {
203                                                                        String substring = text.substring(i, closingEscape + 1);
204                                                                        result.append(substring);
205                                                                        i += substring.length();
206                                                                } else {
207                                                                        i++;
208                                                                }
209                                                                break;
210                                                        }
211                                                        default:
212                                                        {
213                                                                i++;
214                                                        }
215                                                }
216                                                
217                                        } else {
218                                                i++;
219                                        }
220                }
221
222
223            }
224        }
225        return result.toString();
226    }
227
228    /**
229     * Returns a HashTable with escape sequences as keys, and corresponding
230     * Strings as values.
231     */
232    private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
233        EncLookup escapeSequences = variousEncChars.get(encChars);
234        if (escapeSequences == null) {
235            // this means we haven't got the sequences for these encoding
236            // characters yet - let's make them
237            escapeSequences = new EncLookup(encChars);
238            variousEncChars.put(encChars, escapeSequences);
239        }
240        return escapeSequences;
241    }
242
243
244
245
246    /**
247     * A performance-optimized replacement for using when
248     * mapping from HL7 special characters to their respective
249     * encodings
250     *
251     * @author Christian Ohr
252     */
253    private static class EncLookup {
254
255        char[] characters = new char[6];
256        String[] encodings = new String[6];
257
258        EncLookup(EncodingCharacters ec) {
259            characters[0] = ec.getFieldSeparator();
260            characters[1] = ec.getComponentSeparator();
261            characters[2] = ec.getSubcomponentSeparator();
262            characters[3] = ec.getRepetitionSeparator();
263            characters[4] = ec.getEscapeCharacter();
264            characters[5] = '\r';
265            char[] codes = {'F', 'S', 'T', 'R', 'E'};
266            for (int i = 0; i < codes.length; i++) {
267                StringBuffer seq = new StringBuffer();
268                seq.append(ec.getEscapeCharacter());
269                seq.append(codes[i]);
270                seq.append(ec.getEscapeCharacter());
271                encodings[i] = seq.toString();
272            }
273            encodings[5] = "\\X000d\\";
274        }
275    }
276}