001    /**
002    The contents of this file are subject to the Mozilla Public License Version 1.1 
003    (the "License"); you may not use this file except in compliance with the License. 
004    You may obtain a copy of the License at http://www.mozilla.org/MPL/ 
005    Software distributed under the License is distributed on an "AS IS" basis, 
006    WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 
007    specific language governing rights and limitations under the License. 
008    
009    The Original Code is "Escape.java".  Description: 
010    "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
011    defined in section 2.10 of the standard (version 2.4)" 
012    
013    The Initial Developer of the Original Code is University Health Network. Copyright (C) 
014    2001.  All Rights Reserved. 
015    
016    Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 
017    
018    Alternatively, the contents of this file may be used under the terms of the 
019    GNU General Public License (the  �GPL�), in which case the provisions of the GPL are 
020    applicable instead of those above.  If you wish to allow use of your version of this 
021    file only under the terms of the GPL and not to allow others to use your version 
022    of this file under the MPL, indicate your decision by deleting  the provisions above 
023    and replace  them with the notice and other provisions required by the GPL License.  
024    If you do not delete the provisions above, a recipient may use your version of 
025    this file under either the MPL or the GPL. 
026     */
027    package ca.uhn.hl7v2.parser;
028    
029    import java.util.Collections;
030    import java.util.LinkedHashMap;
031    import java.util.Map;
032    
033    /**
034     * Handles "escaping" and "unescaping" of text according to the HL7 escape
035     * sequence rules defined in section 2.10 of the standard (version 2.4).
036     * Currently, escape sequences for multiple character sets are unsupported. The
037     * highlighting, hexademical, and locally defined escape sequences are also
038     * unsupported.
039     * 
040     * @author Bryan Tripp
041     * @author Mark Lee (Skeva Technologies)
042     * @author Elmar Hinz
043     * @author Christian Ohr
044     */
045    public class Escape {
046    
047        /**
048         * limits the size of variousEncChars to 1000, can be overridden by system property.
049         */
050        private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) {
051    
052            private static final long serialVersionUID = 1L;
053            final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
054    
055            @Override
056            protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
057                return this.size() > maxSize;
058            }
059        });
060    
061        /** Creates a new instance of Escape */
062        public Escape() {
063        }
064    
065        public static String escape(String text, EncodingCharacters encChars) {
066            EncLookup esc = getEscapeSequences(encChars);
067            int textLength = text.length();
068    
069            StringBuilder result = new StringBuilder(textLength);
070            for (int i = 0; i < textLength; i++) {
071                boolean charReplaced = false;
072                char c = text.charAt(i);
073    
074                FORENCCHARS:
075                            for (int j = 0; j < 6; j++) {
076                    if (text.charAt(i) == esc.characters[j]) {
077    
078                                            // Formatting escape sequences such as /.br/ should be left alone
079                                            if (j == 4) {
080                                                    
081                                                    if (i+1 < textLength) {
082                                                            
083                                                            // Check for \.br\
084                                                            char nextChar = text.charAt(i + 1);
085                                                            switch (nextChar) {
086                                                            case '.':
087                                                            case 'C':
088                                                            case 'M':
089                                                            case 'X':
090                                                            case 'Z':
091                                                            {
092                                                                    int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
093                                                                    if (nextEscapeIndex > 0) {
094                                                                            result.append(text.substring(i, nextEscapeIndex + 1));
095                                                                            charReplaced = true;
096                                                                            i = nextEscapeIndex;
097                                                                            break FORENCCHARS;
098                                                                    }
099                                                                    break;
100                                                            }
101                                                            case 'H':
102                                                            case 'N':
103                                                            {
104                                                                    if (i+2 < textLength && text.charAt(i+2) == '\\') {
105                                                                            int nextEscapeIndex = i + 2;
106                                                                            if (nextEscapeIndex > 0) {
107                                                                                    result.append(text.substring(i, nextEscapeIndex + 1));
108                                                                                    charReplaced = true;
109                                                                                    i = nextEscapeIndex;
110                                                                                    break FORENCCHARS;
111                                                                            }
112                                                                    }
113                                                                    break;
114                                                            }
115                                                            }
116                                                            
117                                                    }
118                                                    
119                                            }
120    
121                        result.append(esc.encodings[j]);
122                        charReplaced = true;
123                        break;
124                    }
125                }
126                if (!charReplaced) {
127                    result.append(c);
128                }
129            }
130            return result.toString();
131        }
132    
133        public static String unescape(String text, EncodingCharacters encChars) {
134    
135            // If the escape char isn't found, we don't need to look for escape sequences
136            char escapeChar = encChars.getEscapeCharacter();
137            boolean foundEscapeChar = false;
138            for (int i = 0; i < text.length(); i++) {
139                if (text.charAt(i) == escapeChar) {
140                    foundEscapeChar = true;
141                    break;
142                }
143            }
144            if (foundEscapeChar == false) {
145                return text;
146            }
147    
148            int textLength = text.length();
149            StringBuilder result = new StringBuilder(textLength + 20);
150            EncLookup esc = getEscapeSequences(encChars);
151            char escape = esc.characters[4];
152            int encodingsCount = esc.characters.length;
153            int i = 0;
154            while (i < textLength) {
155                char c = text.charAt(i);
156                if (c != escape) {
157                    result.append(c);
158                    i++;
159                } else {
160                    boolean foundEncoding = false;
161    
162                                    // Test against the standard encodings
163                                    for (int j = 0; j < encodingsCount; j++) {
164                        String encoding = esc.encodings[j];
165                                            int encodingLength = encoding.length();
166                                            if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
167                                .equals(encoding)) {
168                            result.append(esc.characters[j]);
169                            i += encodingLength;
170                            foundEncoding = true;
171                            break;
172                        }
173                    }
174    
175                    if (!foundEncoding) {
176                                            
177                                            // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
178                                            // formatting codes. They should be left intact
179                                            if (i + 1 < textLength) {
180                                                    char nextChar = text.charAt(i + 1);
181                                                    switch (nextChar) {
182                                                            case '.':
183                                                            case 'C':
184                                                            case 'M':
185                                                            case 'X':
186                                                            case 'Z':
187                                                            {
188                                                                    int closingEscape = text.indexOf(escape, i + 1);
189                                                                    if (closingEscape > 0) {
190                                                                            String substring = text.substring(i, closingEscape + 1);
191                                                                            result.append(substring);
192                                                                            i += substring.length();
193                                                                    } else {
194                                                                            i++;
195                                                                    }
196                                                                    break;
197                                                            }
198                                                            case 'H':
199                                                            case 'N':
200                                                            {
201                                                                    int closingEscape = text.indexOf(escape, i + 1);
202                                                                    if (closingEscape == i + 2) {
203                                                                            String substring = text.substring(i, closingEscape + 1);
204                                                                            result.append(substring);
205                                                                            i += substring.length();
206                                                                    } else {
207                                                                            i++;
208                                                                    }
209                                                                    break;
210                                                            }
211                                                            default:
212                                                            {
213                                                                    i++;
214                                                            }
215                                                    }
216                                                    
217                                            } else {
218                                                    i++;
219                                            }
220                    }
221    
222    
223                }
224            }
225            return result.toString();
226        }
227    
228        /**
229         * Returns a HashTable with escape sequences as keys, and corresponding
230         * Strings as values.
231         */
232        private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
233            EncLookup escapeSequences = variousEncChars.get(encChars);
234            if (escapeSequences == null) {
235                // this means we haven't got the sequences for these encoding
236                // characters yet - let's make them
237                escapeSequences = new EncLookup(encChars);
238                variousEncChars.put(encChars, escapeSequences);
239            }
240            return escapeSequences;
241        }
242    
243    
244    
245    
246        /**
247         * A performance-optimized replacement for using when
248         * mapping from HL7 special characters to their respective
249         * encodings
250         *
251         * @author Christian Ohr
252         */
253        private static class EncLookup {
254    
255            char[] characters = new char[6];
256            String[] encodings = new String[6];
257    
258            EncLookup(EncodingCharacters ec) {
259                characters[0] = ec.getFieldSeparator();
260                characters[1] = ec.getComponentSeparator();
261                characters[2] = ec.getSubcomponentSeparator();
262                characters[3] = ec.getRepetitionSeparator();
263                characters[4] = ec.getEscapeCharacter();
264                characters[5] = '\r';
265                char[] codes = {'F', 'S', 'T', 'R', 'E'};
266                for (int i = 0; i < codes.length; i++) {
267                    StringBuffer seq = new StringBuffer();
268                    seq.append(ec.getEscapeCharacter());
269                    seq.append(codes[i]);
270                    seq.append(ec.getEscapeCharacter());
271                    encodings[i] = seq.toString();
272                }
273                encodings[5] = "\\X000d\\";
274            }
275        }
276    }