001 /**
002 The contents of this file are subject to the Mozilla Public License Version 1.1
003 (the "License"); you may not use this file except in compliance with the License.
004 You may obtain a copy of the License at http://www.mozilla.org/MPL/
005 Software distributed under the License is distributed on an "AS IS" basis,
006 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
007 specific language governing rights and limitations under the License.
008
009 The Original Code is "Escape.java". Description:
010 "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
011 defined in section 2.10 of the standard (version 2.4)"
012
013 The Initial Developer of the Original Code is University Health Network. Copyright (C)
014 2001. All Rights Reserved.
015
016 Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz
017
018 Alternatively, the contents of this file may be used under the terms of the
019 GNU General Public License (the �GPL�), in which case the provisions of the GPL are
020 applicable instead of those above. If you wish to allow use of your version of this
021 file only under the terms of the GPL and not to allow others to use your version
022 of this file under the MPL, indicate your decision by deleting the provisions above
023 and replace them with the notice and other provisions required by the GPL License.
024 If you do not delete the provisions above, a recipient may use your version of
025 this file under either the MPL or the GPL.
026 */
027 package ca.uhn.hl7v2.parser;
028
029 import java.util.Collections;
030 import java.util.LinkedHashMap;
031 import java.util.Map;
032
033 /**
034 * Handles "escaping" and "unescaping" of text according to the HL7 escape
035 * sequence rules defined in section 2.10 of the standard (version 2.4).
036 * Currently, escape sequences for multiple character sets are unsupported. The
037 * highlighting, hexademical, and locally defined escape sequences are also
038 * unsupported.
039 *
040 * @author Bryan Tripp
041 * @author Mark Lee (Skeva Technologies)
042 * @author Elmar Hinz
043 * @author Christian Ohr
044 */
045 public class Escape {
046
047 /**
048 * limits the size of variousEncChars to 1000, can be overridden by system property.
049 */
050 private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) {
051
052 private static final long serialVersionUID = 1L;
053 final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
054
055 @Override
056 protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
057 return this.size() > maxSize;
058 }
059 });
060
061 /** Creates a new instance of Escape */
062 public Escape() {
063 }
064
065 public static String escape(String text, EncodingCharacters encChars) {
066 EncLookup esc = getEscapeSequences(encChars);
067 int textLength = text.length();
068
069 StringBuilder result = new StringBuilder(textLength);
070 for (int i = 0; i < textLength; i++) {
071 boolean charReplaced = false;
072 char c = text.charAt(i);
073
074 FORENCCHARS:
075 for (int j = 0; j < 6; j++) {
076 if (text.charAt(i) == esc.characters[j]) {
077
078 // Formatting escape sequences such as /.br/ should be left alone
079 if (j == 4) {
080
081 if (i+1 < textLength) {
082
083 // Check for \.br\
084 char nextChar = text.charAt(i + 1);
085 switch (nextChar) {
086 case '.':
087 case 'C':
088 case 'M':
089 case 'X':
090 case 'Z':
091 {
092 int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
093 if (nextEscapeIndex > 0) {
094 result.append(text.substring(i, nextEscapeIndex + 1));
095 charReplaced = true;
096 i = nextEscapeIndex;
097 break FORENCCHARS;
098 }
099 break;
100 }
101 case 'H':
102 case 'N':
103 {
104 if (i+2 < textLength && text.charAt(i+2) == '\\') {
105 int nextEscapeIndex = i + 2;
106 if (nextEscapeIndex > 0) {
107 result.append(text.substring(i, nextEscapeIndex + 1));
108 charReplaced = true;
109 i = nextEscapeIndex;
110 break FORENCCHARS;
111 }
112 }
113 break;
114 }
115 }
116
117 }
118
119 }
120
121 result.append(esc.encodings[j]);
122 charReplaced = true;
123 break;
124 }
125 }
126 if (!charReplaced) {
127 result.append(c);
128 }
129 }
130 return result.toString();
131 }
132
133 public static String unescape(String text, EncodingCharacters encChars) {
134
135 // If the escape char isn't found, we don't need to look for escape sequences
136 char escapeChar = encChars.getEscapeCharacter();
137 boolean foundEscapeChar = false;
138 for (int i = 0; i < text.length(); i++) {
139 if (text.charAt(i) == escapeChar) {
140 foundEscapeChar = true;
141 break;
142 }
143 }
144 if (foundEscapeChar == false) {
145 return text;
146 }
147
148 int textLength = text.length();
149 StringBuilder result = new StringBuilder(textLength + 20);
150 EncLookup esc = getEscapeSequences(encChars);
151 char escape = esc.characters[4];
152 int encodingsCount = esc.characters.length;
153 int i = 0;
154 while (i < textLength) {
155 char c = text.charAt(i);
156 if (c != escape) {
157 result.append(c);
158 i++;
159 } else {
160 boolean foundEncoding = false;
161
162 // Test against the standard encodings
163 for (int j = 0; j < encodingsCount; j++) {
164 String encoding = esc.encodings[j];
165 int encodingLength = encoding.length();
166 if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
167 .equals(encoding)) {
168 result.append(esc.characters[j]);
169 i += encodingLength;
170 foundEncoding = true;
171 break;
172 }
173 }
174
175 if (!foundEncoding) {
176
177 // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
178 // formatting codes. They should be left intact
179 if (i + 1 < textLength) {
180 char nextChar = text.charAt(i + 1);
181 switch (nextChar) {
182 case '.':
183 case 'C':
184 case 'M':
185 case 'X':
186 case 'Z':
187 {
188 int closingEscape = text.indexOf(escape, i + 1);
189 if (closingEscape > 0) {
190 String substring = text.substring(i, closingEscape + 1);
191 result.append(substring);
192 i += substring.length();
193 } else {
194 i++;
195 }
196 break;
197 }
198 case 'H':
199 case 'N':
200 {
201 int closingEscape = text.indexOf(escape, i + 1);
202 if (closingEscape == i + 2) {
203 String substring = text.substring(i, closingEscape + 1);
204 result.append(substring);
205 i += substring.length();
206 } else {
207 i++;
208 }
209 break;
210 }
211 default:
212 {
213 i++;
214 }
215 }
216
217 } else {
218 i++;
219 }
220 }
221
222
223 }
224 }
225 return result.toString();
226 }
227
228 /**
229 * Returns a HashTable with escape sequences as keys, and corresponding
230 * Strings as values.
231 */
232 private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
233 EncLookup escapeSequences = variousEncChars.get(encChars);
234 if (escapeSequences == null) {
235 // this means we haven't got the sequences for these encoding
236 // characters yet - let's make them
237 escapeSequences = new EncLookup(encChars);
238 variousEncChars.put(encChars, escapeSequences);
239 }
240 return escapeSequences;
241 }
242
243
244
245
246 /**
247 * A performance-optimized replacement for using when
248 * mapping from HL7 special characters to their respective
249 * encodings
250 *
251 * @author Christian Ohr
252 */
253 private static class EncLookup {
254
255 char[] characters = new char[6];
256 String[] encodings = new String[6];
257
258 EncLookup(EncodingCharacters ec) {
259 characters[0] = ec.getFieldSeparator();
260 characters[1] = ec.getComponentSeparator();
261 characters[2] = ec.getSubcomponentSeparator();
262 characters[3] = ec.getRepetitionSeparator();
263 characters[4] = ec.getEscapeCharacter();
264 characters[5] = '\r';
265 char[] codes = {'F', 'S', 'T', 'R', 'E'};
266 for (int i = 0; i < codes.length; i++) {
267 StringBuffer seq = new StringBuffer();
268 seq.append(ec.getEscapeCharacter());
269 seq.append(codes[i]);
270 seq.append(ec.getEscapeCharacter());
271 encodings[i] = seq.toString();
272 }
273 encodings[5] = "\\X000d\\";
274 }
275 }
276 }