001 /** 002 The contents of this file are subject to the Mozilla Public License Version 1.1 003 (the "License"); you may not use this file except in compliance with the License. 004 You may obtain a copy of the License at http://www.mozilla.org/MPL/ 005 Software distributed under the License is distributed on an "AS IS" basis, 006 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 007 specific language governing rights and limitations under the License. 008 009 The Original Code is "Escape.java". Description: 010 "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules 011 defined in section 2.10 of the standard (version 2.4)" 012 013 The Initial Developer of the Original Code is University Health Network. Copyright (C) 014 2001. All Rights Reserved. 015 016 Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 017 018 Alternatively, the contents of this file may be used under the terms of the 019 GNU General Public License (the ???GPL???), in which case the provisions of the GPL are 020 applicable instead of those above. If you wish to allow use of your version of this 021 file only under the terms of the GPL and not to allow others to use your version 022 of this file under the MPL, indicate your decision by deleting the provisions above 023 and replace them with the notice and other provisions required by the GPL License. 024 If you do not delete the provisions above, a recipient may use your version of 025 this file under either the MPL or the GPL. 026 */ 027 package ca.uhn.hl7v2.parser; 028 029 import java.util.Collections; 030 import java.util.LinkedHashMap; 031 import java.util.Map; 032 033 /** 034 * Handles "escaping" and "unescaping" of text according to the HL7 escape 035 * sequence rules defined in section 2.10 of the standard (version 2.4). 036 * Currently, escape sequences for multiple character sets are unsupported. The 037 * highlighting, hexademical, and locally defined escape sequences are also 038 * unsupported. 039 * 040 * @author Bryan Tripp 041 * @author Mark Lee (Skeva Technologies) 042 * @author Elmar Hinz 043 * @author Christian Ohr 044 */ 045 public class Escape { 046 047 /** 048 * limits the size of variousEncChars to 1000, can be overridden by system property. 049 */ 050 private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) { 051 052 private static final long serialVersionUID = 1L; 053 final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000")); 054 055 @Override 056 protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) { 057 return this.size() > maxSize; 058 } 059 }); 060 061 /** Creates a new instance of Escape */ 062 public Escape() { 063 } 064 065 public static String escape(String text, EncodingCharacters encChars) { 066 EncLookup esc = getEscapeSequences(encChars); 067 int textLength = text.length(); 068 069 StringBuilder result = new StringBuilder(textLength); 070 for (int i = 0; i < textLength; i++) { 071 boolean charReplaced = false; 072 char c = text.charAt(i); 073 074 for (int j = 0; j < 6; j++) { 075 if (text.charAt(i) == esc.characters[j]) { 076 077 // Formatting escape sequences such as /.br/ should be left alone 078 if (j == 4) { 079 if (i + 1 < textLength) { 080 if (text.charAt(i + 1) == '.') { 081 int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1); 082 if (nextEscapeIndex > 0) { 083 result.append(text.substring(i, nextEscapeIndex + 1)); 084 charReplaced = true; 085 i = nextEscapeIndex; 086 break; 087 } 088 } 089 } 090 } 091 092 result.append(esc.encodings[j]); 093 charReplaced = true; 094 break; 095 } 096 } 097 if (!charReplaced) { 098 result.append(c); 099 } 100 } 101 return result.toString(); 102 } 103 104 public static String unescape(String text, EncodingCharacters encChars) { 105 106 // If the escape char isn't found, we don't need to look for escape sequences 107 char escapeChar = encChars.getEscapeCharacter(); 108 boolean foundEscapeChar = false; 109 for (int i = 0; i < text.length(); i++) { 110 if (text.charAt(i) == escapeChar) { 111 foundEscapeChar = true; 112 break; 113 } 114 } 115 if (foundEscapeChar == false) { 116 return text; 117 } 118 119 int textLength = text.length(); 120 StringBuilder result = new StringBuilder(textLength + 20); 121 EncLookup esc = getEscapeSequences(encChars); 122 char escape = esc.characters[4]; 123 int encodingsCount = esc.characters.length; 124 int i = 0; 125 while (i < textLength) { 126 char c = text.charAt(i); 127 if (c != escape) { 128 result.append(c); 129 i++; 130 } else { 131 boolean foundEncoding = false; 132 133 // Test against the standard encodings 134 for (int j = 0; j < encodingsCount; j++) { 135 String encoding = esc.encodings[j]; 136 int encodingLength = encoding.length(); 137 if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength) 138 .equals(encoding)) { 139 result.append(esc.characters[j]); 140 i += encodingLength; 141 foundEncoding = true; 142 break; 143 } 144 } 145 146 if (!foundEncoding) { 147 148 // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are 149 // formatting codes. They should be left intact 150 if ((i + 1 < textLength) && text.charAt(i + 1) == '.') { 151 int closingEscape = text.indexOf(escape, i + 1); 152 if (closingEscape > 0) { 153 String substring = text.substring(i, closingEscape + 1); 154 result.append(substring); 155 i += substring.length(); 156 } else { 157 i++; 158 } 159 } else { 160 i++; 161 } 162 } 163 164 165 } 166 } 167 return result.toString(); 168 } 169 170 /** 171 * Returns a HashTable with escape sequences as keys, and corresponding 172 * Strings as values. 173 */ 174 private static EncLookup getEscapeSequences(EncodingCharacters encChars) { 175 EncLookup escapeSequences = variousEncChars.get(encChars); 176 if (escapeSequences == null) { 177 // this means we haven't got the sequences for these encoding 178 // characters yet - let's make them 179 escapeSequences = new EncLookup(encChars); 180 variousEncChars.put(encChars, escapeSequences); 181 } 182 return escapeSequences; 183 } 184 185 186 187 188 /** 189 * A performance-optimized replacement for using when 190 * mapping from HL7 special characters to their respective 191 * encodings 192 * 193 * @author Christian Ohr 194 */ 195 private static class EncLookup { 196 197 char[] characters = new char[6]; 198 String[] encodings = new String[6]; 199 200 EncLookup(EncodingCharacters ec) { 201 characters[0] = ec.getFieldSeparator(); 202 characters[1] = ec.getComponentSeparator(); 203 characters[2] = ec.getSubcomponentSeparator(); 204 characters[3] = ec.getRepetitionSeparator(); 205 characters[4] = ec.getEscapeCharacter(); 206 characters[5] = '\r'; 207 char[] codes = {'F', 'S', 'T', 'R', 'E'}; 208 for (int i = 0; i < codes.length; i++) { 209 StringBuffer seq = new StringBuffer(); 210 seq.append(ec.getEscapeCharacter()); 211 seq.append(codes[i]); 212 seq.append(ec.getEscapeCharacter()); 213 encodings[i] = seq.toString(); 214 } 215 encodings[5] = "\\X000d\\"; 216 } 217 } 218 }