001    /**
002    The contents of this file are subject to the Mozilla Public License Version 1.1 
003    (the "License"); you may not use this file except in compliance with the License. 
004    You may obtain a copy of the License at http://www.mozilla.org/MPL/ 
005    Software distributed under the License is distributed on an "AS IS" basis, 
006    WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 
007    specific language governing rights and limitations under the License. 
008    
009    The Original Code is "Escape.java".  Description: 
010    "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
011    defined in section 2.10 of the standard (version 2.4)" 
012    
013    The Initial Developer of the Original Code is University Health Network. Copyright (C) 
014    2001.  All Rights Reserved. 
015    
016    Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 
017    
018    Alternatively, the contents of this file may be used under the terms of the 
019    GNU General Public License (the  ???GPL???), in which case the provisions of the GPL are 
020    applicable instead of those above.  If you wish to allow use of your version of this 
021    file only under the terms of the GPL and not to allow others to use your version 
022    of this file under the MPL, indicate your decision by deleting  the provisions above 
023    and replace  them with the notice and other provisions required by the GPL License.  
024    If you do not delete the provisions above, a recipient may use your version of 
025    this file under either the MPL or the GPL. 
026     */
027    package ca.uhn.hl7v2.parser;
028    
029    import java.util.Collections;
030    import java.util.LinkedHashMap;
031    import java.util.Map;
032    
033    /**
034     * Handles "escaping" and "unescaping" of text according to the HL7 escape
035     * sequence rules defined in section 2.10 of the standard (version 2.4).
036     * Currently, escape sequences for multiple character sets are unsupported. The
037     * highlighting, hexademical, and locally defined escape sequences are also
038     * unsupported.
039     * 
040     * @author Bryan Tripp
041     * @author Mark Lee (Skeva Technologies)
042     * @author Elmar Hinz
043     * @author Christian Ohr
044     */
045    public class Escape {
046    
047        /**
048         * limits the size of variousEncChars to 1000, can be overridden by system property.
049         */
050        private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) {
051    
052            private static final long serialVersionUID = 1L;
053            final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
054    
055            @Override
056            protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
057                return this.size() > maxSize;
058            }
059        });
060    
061        /** Creates a new instance of Escape */
062        public Escape() {
063        }
064    
065        public static String escape(String text, EncodingCharacters encChars) {
066            EncLookup esc = getEscapeSequences(encChars);
067            int textLength = text.length();
068    
069            StringBuilder result = new StringBuilder(textLength);
070            for (int i = 0; i < textLength; i++) {
071                boolean charReplaced = false;
072                char c = text.charAt(i);
073    
074                            for (int j = 0; j < 6; j++) {
075                    if (text.charAt(i) == esc.characters[j]) {
076    
077                                            // Formatting escape sequences such as /.br/ should be left alone
078                                            if (j == 4) {
079                                                    if (i + 1 < textLength) {
080                                                            if (text.charAt(i + 1) == '.') {
081                                                                    int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
082                                                                    if (nextEscapeIndex > 0) {
083                                                                            result.append(text.substring(i, nextEscapeIndex + 1));
084                                                                            charReplaced = true;
085                                                                            i = nextEscapeIndex;
086                                                                            break;
087                                                                    }
088                                                            }
089                                                    }
090                                            }
091    
092                        result.append(esc.encodings[j]);
093                        charReplaced = true;
094                        break;
095                    }
096                }
097                if (!charReplaced) {
098                    result.append(c);
099                }
100            }
101            return result.toString();
102        }
103    
104        public static String unescape(String text, EncodingCharacters encChars) {
105    
106            // If the escape char isn't found, we don't need to look for escape sequences
107            char escapeChar = encChars.getEscapeCharacter();
108            boolean foundEscapeChar = false;
109            for (int i = 0; i < text.length(); i++) {
110                if (text.charAt(i) == escapeChar) {
111                    foundEscapeChar = true;
112                    break;
113                }
114            }
115            if (foundEscapeChar == false) {
116                return text;
117            }
118    
119            int textLength = text.length();
120            StringBuilder result = new StringBuilder(textLength + 20);
121            EncLookup esc = getEscapeSequences(encChars);
122            char escape = esc.characters[4];
123            int encodingsCount = esc.characters.length;
124            int i = 0;
125            while (i < textLength) {
126                char c = text.charAt(i);
127                if (c != escape) {
128                    result.append(c);
129                    i++;
130                } else {
131                    boolean foundEncoding = false;
132    
133                                    // Test against the standard encodings
134                                    for (int j = 0; j < encodingsCount; j++) {
135                        String encoding = esc.encodings[j];
136                                            int encodingLength = encoding.length();
137                                            if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
138                                .equals(encoding)) {
139                            result.append(esc.characters[j]);
140                            i += encodingLength;
141                            foundEncoding = true;
142                            break;
143                        }
144                    }
145    
146                    if (!foundEncoding) {
147                                            
148                                            // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
149                                            // formatting codes. They should be left intact
150                                            if ((i + 1 < textLength) && text.charAt(i + 1) == '.') {
151                                                    int closingEscape = text.indexOf(escape, i + 1);
152                                                    if (closingEscape > 0) {
153                                                            String substring = text.substring(i, closingEscape + 1);
154                                                            result.append(substring);
155                                                            i += substring.length();
156                                                    } else {
157                                                            i++;
158                                                    }
159                                            } else {
160                                                    i++;
161                                            }
162                    }
163    
164    
165                }
166            }
167            return result.toString();
168        }
169    
170        /**
171         * Returns a HashTable with escape sequences as keys, and corresponding
172         * Strings as values.
173         */
174        private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
175            EncLookup escapeSequences = variousEncChars.get(encChars);
176            if (escapeSequences == null) {
177                // this means we haven't got the sequences for these encoding
178                // characters yet - let's make them
179                escapeSequences = new EncLookup(encChars);
180                variousEncChars.put(encChars, escapeSequences);
181            }
182            return escapeSequences;
183        }
184    
185    
186    
187    
188        /**
189         * A performance-optimized replacement for using when
190         * mapping from HL7 special characters to their respective
191         * encodings
192         *
193         * @author Christian Ohr
194         */
195        private static class EncLookup {
196    
197            char[] characters = new char[6];
198            String[] encodings = new String[6];
199    
200            EncLookup(EncodingCharacters ec) {
201                characters[0] = ec.getFieldSeparator();
202                characters[1] = ec.getComponentSeparator();
203                characters[2] = ec.getSubcomponentSeparator();
204                characters[3] = ec.getRepetitionSeparator();
205                characters[4] = ec.getEscapeCharacter();
206                characters[5] = '\r';
207                char[] codes = {'F', 'S', 'T', 'R', 'E'};
208                for (int i = 0; i < codes.length; i++) {
209                    StringBuffer seq = new StringBuffer();
210                    seq.append(ec.getEscapeCharacter());
211                    seq.append(codes[i]);
212                    seq.append(ec.getEscapeCharacter());
213                    encodings[i] = seq.toString();
214                }
215                encodings[5] = "\\X000d\\";
216            }
217        }
218    }