001/** 002The contents of this file are subject to the Mozilla Public License Version 1.1 003(the "License"); you may not use this file except in compliance with the License. 004You may obtain a copy of the License at http://www.mozilla.org/MPL/ 005Software distributed under the License is distributed on an "AS IS" basis, 006WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 007specific language governing rights and limitations under the License. 008 009The Original Code is "Escape.java". Description: 010"Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules 011defined in section 2.10 of the standard (version 2.4)" 012 013The Initial Developer of the Original Code is University Health Network. Copyright (C) 0142001. All Rights Reserved. 015 016Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 017 018Alternatively, the contents of this file may be used under the terms of the 019GNU General Public License (the �GPL�), in which case the provisions of the GPL are 020applicable instead of those above. If you wish to allow use of your version of this 021file only under the terms of the GPL and not to allow others to use your version 022of this file under the MPL, indicate your decision by deleting the provisions above 023and replace them with the notice and other provisions required by the GPL License. 024If you do not delete the provisions above, a recipient may use your version of 025this file under either the MPL or the GPL. 026 */ 027package ca.uhn.hl7v2.parser; 028 029import java.util.Collections; 030import java.util.LinkedHashMap; 031import java.util.Map; 032 033/** 034 * Handles "escaping" and "unescaping" of text according to the HL7 escape 035 * sequence rules defined in section 2.10 of the standard (version 2.4). 036 * Currently, escape sequences for multiple character sets are unsupported. The 037 * highlighting, hexademical, and locally defined escape sequences are also 038 * unsupported. 039 * 040 * @author Bryan Tripp 041 * @author Mark Lee (Skeva Technologies) 042 * @author Elmar Hinz 043 * @author Christian Ohr 044 */ 045public class Escape { 046 047 /** 048 * limits the size of variousEncChars to 1000, can be overridden by system property. 049 */ 050 private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) { 051 052 private static final long serialVersionUID = 1L; 053 final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000")); 054 055 @Override 056 protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) { 057 return this.size() > maxSize; 058 } 059 }); 060 061 /** Creates a new instance of Escape */ 062 public Escape() { 063 } 064 065 public static String escape(String text, EncodingCharacters encChars) { 066 EncLookup esc = getEscapeSequences(encChars); 067 int textLength = text.length(); 068 069 StringBuilder result = new StringBuilder(textLength); 070 for (int i = 0; i < textLength; i++) { 071 boolean charReplaced = false; 072 char c = text.charAt(i); 073 074 FORENCCHARS: 075 for (int j = 0; j < 6; j++) { 076 if (text.charAt(i) == esc.characters[j]) { 077 078 // Formatting escape sequences such as /.br/ should be left alone 079 if (j == 4) { 080 081 if (i+1 < textLength) { 082 083 // Check for \.br\ 084 char nextChar = text.charAt(i + 1); 085 switch (nextChar) { 086 case '.': 087 case 'C': 088 case 'M': 089 case 'X': 090 case 'Z': 091 { 092 int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1); 093 if (nextEscapeIndex > 0) { 094 result.append(text.substring(i, nextEscapeIndex + 1)); 095 charReplaced = true; 096 i = nextEscapeIndex; 097 break FORENCCHARS; 098 } 099 break; 100 } 101 case 'H': 102 case 'N': 103 { 104 if (i+2 < textLength && text.charAt(i+2) == '\\') { 105 int nextEscapeIndex = i + 2; 106 if (nextEscapeIndex > 0) { 107 result.append(text.substring(i, nextEscapeIndex + 1)); 108 charReplaced = true; 109 i = nextEscapeIndex; 110 break FORENCCHARS; 111 } 112 } 113 break; 114 } 115 } 116 117 } 118 119 } 120 121 result.append(esc.encodings[j]); 122 charReplaced = true; 123 break; 124 } 125 } 126 if (!charReplaced) { 127 result.append(c); 128 } 129 } 130 return result.toString(); 131 } 132 133 public static String unescape(String text, EncodingCharacters encChars) { 134 135 // If the escape char isn't found, we don't need to look for escape sequences 136 char escapeChar = encChars.getEscapeCharacter(); 137 boolean foundEscapeChar = false; 138 for (int i = 0; i < text.length(); i++) { 139 if (text.charAt(i) == escapeChar) { 140 foundEscapeChar = true; 141 break; 142 } 143 } 144 if (foundEscapeChar == false) { 145 return text; 146 } 147 148 int textLength = text.length(); 149 StringBuilder result = new StringBuilder(textLength + 20); 150 EncLookup esc = getEscapeSequences(encChars); 151 char escape = esc.characters[4]; 152 int encodingsCount = esc.characters.length; 153 int i = 0; 154 while (i < textLength) { 155 char c = text.charAt(i); 156 if (c != escape) { 157 result.append(c); 158 i++; 159 } else { 160 boolean foundEncoding = false; 161 162 // Test against the standard encodings 163 for (int j = 0; j < encodingsCount; j++) { 164 String encoding = esc.encodings[j]; 165 int encodingLength = encoding.length(); 166 if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength) 167 .equals(encoding)) { 168 result.append(esc.characters[j]); 169 i += encodingLength; 170 foundEncoding = true; 171 break; 172 } 173 } 174 175 if (!foundEncoding) { 176 177 // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are 178 // formatting codes. They should be left intact 179 if (i + 1 < textLength) { 180 char nextChar = text.charAt(i + 1); 181 switch (nextChar) { 182 case '.': 183 case 'C': 184 case 'M': 185 case 'X': 186 case 'Z': 187 { 188 int closingEscape = text.indexOf(escape, i + 1); 189 if (closingEscape > 0) { 190 String substring = text.substring(i, closingEscape + 1); 191 result.append(substring); 192 i += substring.length(); 193 } else { 194 i++; 195 } 196 break; 197 } 198 case 'H': 199 case 'N': 200 { 201 int closingEscape = text.indexOf(escape, i + 1); 202 if (closingEscape == i + 2) { 203 String substring = text.substring(i, closingEscape + 1); 204 result.append(substring); 205 i += substring.length(); 206 } else { 207 i++; 208 } 209 break; 210 } 211 default: 212 { 213 i++; 214 } 215 } 216 217 } else { 218 i++; 219 } 220 } 221 222 223 } 224 } 225 return result.toString(); 226 } 227 228 /** 229 * Returns a HashTable with escape sequences as keys, and corresponding 230 * Strings as values. 231 */ 232 private static EncLookup getEscapeSequences(EncodingCharacters encChars) { 233 EncLookup escapeSequences = variousEncChars.get(encChars); 234 if (escapeSequences == null) { 235 // this means we haven't got the sequences for these encoding 236 // characters yet - let's make them 237 escapeSequences = new EncLookup(encChars); 238 variousEncChars.put(encChars, escapeSequences); 239 } 240 return escapeSequences; 241 } 242 243 244 245 246 /** 247 * A performance-optimized replacement for using when 248 * mapping from HL7 special characters to their respective 249 * encodings 250 * 251 * @author Christian Ohr 252 */ 253 private static class EncLookup { 254 255 char[] characters = new char[6]; 256 String[] encodings = new String[6]; 257 258 EncLookup(EncodingCharacters ec) { 259 characters[0] = ec.getFieldSeparator(); 260 characters[1] = ec.getComponentSeparator(); 261 characters[2] = ec.getSubcomponentSeparator(); 262 characters[3] = ec.getRepetitionSeparator(); 263 characters[4] = ec.getEscapeCharacter(); 264 characters[5] = '\r'; 265 char[] codes = {'F', 'S', 'T', 'R', 'E'}; 266 for (int i = 0; i < codes.length; i++) { 267 StringBuffer seq = new StringBuffer(); 268 seq.append(ec.getEscapeCharacter()); 269 seq.append(codes[i]); 270 seq.append(ec.getEscapeCharacter()); 271 encodings[i] = seq.toString(); 272 } 273 encodings[5] = "\\X000d\\"; 274 } 275 } 276}