001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang; 018 019 import java.io.IOException; 020 import java.io.StringWriter; 021 import java.io.Writer; 022 023 import org.apache.commons.lang.exception.NestableRuntimeException; 024 025 /** 026 * <p>Escapes and unescapes <code>String</code>s for 027 * Java, Java Script, HTML, XML, and SQL.</p> 028 * 029 * @author Apache Jakarta Turbine 030 * @author Purple Technology 031 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a> 032 * @author Antony Riley 033 * @author Helge Tesgaard 034 * @author <a href="sean@boohai.com">Sean Brown</a> 035 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a> 036 * @author Phil Steitz 037 * @author Pete Gieser 038 * @since 2.0 039 * @version $Id: StringEscapeUtils.java 612880 2008-01-17 17:34:43Z ggregory $ 040 */ 041 public class StringEscapeUtils { 042 043 private static final char CSV_DELIMITER = ','; 044 private static final char CSV_QUOTE = '"'; 045 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 046 private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 047 048 /** 049 * <p><code>StringEscapeUtils</code> instances should NOT be constructed in 050 * standard programming.</p> 051 * 052 * <p>Instead, the class should be used as: 053 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p> 054 * 055 * <p>This constructor is public to permit tools that require a JavaBean 056 * instance to operate.</p> 057 */ 058 public StringEscapeUtils() { 059 super(); 060 } 061 062 // Java and JavaScript 063 //-------------------------------------------------------------------------- 064 /** 065 * <p>Escapes the characters in a <code>String</code> using Java String rules.</p> 066 * 067 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 068 * 069 * <p>So a tab becomes the characters <code>'\\'</code> and 070 * <code>'t'</code>.</p> 071 * 072 * <p>The only difference between Java strings and JavaScript strings 073 * is that in JavaScript, a single quote must be escaped.</p> 074 * 075 * <p>Example: 076 * <pre> 077 * input string: He didn't say, "Stop!" 078 * output string: He didn't say, \"Stop!\" 079 * </pre> 080 * </p> 081 * 082 * @param str String to escape values in, may be null 083 * @return String with escaped values, <code>null</code> if null string input 084 */ 085 public static String escapeJava(String str) { 086 return escapeJavaStyleString(str, false); 087 } 088 089 /** 090 * <p>Escapes the characters in a <code>String</code> using Java String rules to 091 * a <code>Writer</code>.</p> 092 * 093 * <p>A <code>null</code> string input has no effect.</p> 094 * 095 * @see #escapeJava(java.lang.String) 096 * @param out Writer to write escaped string into 097 * @param str String to escape values in, may be null 098 * @throws IllegalArgumentException if the Writer is <code>null</code> 099 * @throws IOException if error occurs on underlying Writer 100 */ 101 public static void escapeJava(Writer out, String str) throws IOException { 102 escapeJavaStyleString(out, str, false); 103 } 104 105 /** 106 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p> 107 * <p>Escapes any values it finds into their JavaScript String form. 108 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 109 * 110 * <p>So a tab becomes the characters <code>'\\'</code> and 111 * <code>'t'</code>.</p> 112 * 113 * <p>The only difference between Java strings and JavaScript strings 114 * is that in JavaScript, a single quote must be escaped.</p> 115 * 116 * <p>Example: 117 * <pre> 118 * input string: He didn't say, "Stop!" 119 * output string: He didn\'t say, \"Stop!\" 120 * </pre> 121 * </p> 122 * 123 * @param str String to escape values in, may be null 124 * @return String with escaped values, <code>null</code> if null string input 125 */ 126 public static String escapeJavaScript(String str) { 127 return escapeJavaStyleString(str, true); 128 } 129 130 /** 131 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules 132 * to a <code>Writer</code>.</p> 133 * 134 * <p>A <code>null</code> string input has no effect.</p> 135 * 136 * @see #escapeJavaScript(java.lang.String) 137 * @param out Writer to write escaped string into 138 * @param str String to escape values in, may be null 139 * @throws IllegalArgumentException if the Writer is <code>null</code> 140 * @throws IOException if error occurs on underlying Writer 141 **/ 142 public static void escapeJavaScript(Writer out, String str) throws IOException { 143 escapeJavaStyleString(out, str, true); 144 } 145 146 /** 147 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p> 148 * 149 * @param str String to escape values in, may be null 150 * @param escapeSingleQuotes escapes single quotes if <code>true</code> 151 * @return the escaped string 152 */ 153 private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes) { 154 if (str == null) { 155 return null; 156 } 157 try { 158 StringWriter writer = new StringWriter(str.length() * 2); 159 escapeJavaStyleString(writer, str, escapeSingleQuotes); 160 return writer.toString(); 161 } catch (IOException ioe) { 162 // this should never ever happen while writing to a StringWriter 163 ioe.printStackTrace(); 164 return null; 165 } 166 } 167 168 /** 169 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p> 170 * 171 * @param out write to receieve the escaped string 172 * @param str String to escape values in, may be null 173 * @param escapeSingleQuote escapes single quotes if <code>true</code> 174 * @throws IOException if an IOException occurs 175 */ 176 private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote) throws IOException { 177 if (out == null) { 178 throw new IllegalArgumentException("The Writer must not be null"); 179 } 180 if (str == null) { 181 return; 182 } 183 int sz; 184 sz = str.length(); 185 for (int i = 0; i < sz; i++) { 186 char ch = str.charAt(i); 187 188 // handle unicode 189 if (ch > 0xfff) { 190 out.write("\\u" + hex(ch)); 191 } else if (ch > 0xff) { 192 out.write("\\u0" + hex(ch)); 193 } else if (ch > 0x7f) { 194 out.write("\\u00" + hex(ch)); 195 } else if (ch < 32) { 196 switch (ch) { 197 case '\b': 198 out.write('\\'); 199 out.write('b'); 200 break; 201 case '\n': 202 out.write('\\'); 203 out.write('n'); 204 break; 205 case '\t': 206 out.write('\\'); 207 out.write('t'); 208 break; 209 case '\f': 210 out.write('\\'); 211 out.write('f'); 212 break; 213 case '\r': 214 out.write('\\'); 215 out.write('r'); 216 break; 217 default : 218 if (ch > 0xf) { 219 out.write("\\u00" + hex(ch)); 220 } else { 221 out.write("\\u000" + hex(ch)); 222 } 223 break; 224 } 225 } else { 226 switch (ch) { 227 case '\'': 228 if (escapeSingleQuote) { 229 out.write('\\'); 230 } 231 out.write('\''); 232 break; 233 case '"': 234 out.write('\\'); 235 out.write('"'); 236 break; 237 case '\\': 238 out.write('\\'); 239 out.write('\\'); 240 break; 241 case '/': 242 out.write('\\'); 243 out.write('/'); 244 break; 245 default : 246 out.write(ch); 247 break; 248 } 249 } 250 } 251 } 252 253 /** 254 * <p>Returns an upper case hexadecimal <code>String</code> for the given 255 * character.</p> 256 * 257 * @param ch The character to convert. 258 * @return An upper case hexadecimal <code>String</code> 259 */ 260 private static String hex(char ch) { 261 return Integer.toHexString(ch).toUpperCase(); 262 } 263 264 /** 265 * <p>Unescapes any Java literals found in the <code>String</code>. 266 * For example, it will turn a sequence of <code>'\'</code> and 267 * <code>'n'</code> into a newline character, unless the <code>'\'</code> 268 * is preceded by another <code>'\'</code>.</p> 269 * 270 * @param str the <code>String</code> to unescape, may be null 271 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 272 */ 273 public static String unescapeJava(String str) { 274 if (str == null) { 275 return null; 276 } 277 try { 278 StringWriter writer = new StringWriter(str.length()); 279 unescapeJava(writer, str); 280 return writer.toString(); 281 } catch (IOException ioe) { 282 // this should never ever happen while writing to a StringWriter 283 ioe.printStackTrace(); 284 return null; 285 } 286 } 287 288 /** 289 * <p>Unescapes any Java literals found in the <code>String</code> to a 290 * <code>Writer</code>.</p> 291 * 292 * <p>For example, it will turn a sequence of <code>'\'</code> and 293 * <code>'n'</code> into a newline character, unless the <code>'\'</code> 294 * is preceded by another <code>'\'</code>.</p> 295 * 296 * <p>A <code>null</code> string input has no effect.</p> 297 * 298 * @param out the <code>Writer</code> used to output unescaped characters 299 * @param str the <code>String</code> to unescape, may be null 300 * @throws IllegalArgumentException if the Writer is <code>null</code> 301 * @throws IOException if error occurs on underlying Writer 302 */ 303 public static void unescapeJava(Writer out, String str) throws IOException { 304 if (out == null) { 305 throw new IllegalArgumentException("The Writer must not be null"); 306 } 307 if (str == null) { 308 return; 309 } 310 int sz = str.length(); 311 StringBuffer unicode = new StringBuffer(4); 312 boolean hadSlash = false; 313 boolean inUnicode = false; 314 for (int i = 0; i < sz; i++) { 315 char ch = str.charAt(i); 316 if (inUnicode) { 317 // if in unicode, then we're reading unicode 318 // values in somehow 319 unicode.append(ch); 320 if (unicode.length() == 4) { 321 // unicode now contains the four hex digits 322 // which represents our unicode character 323 try { 324 int value = Integer.parseInt(unicode.toString(), 16); 325 out.write((char) value); 326 unicode.setLength(0); 327 inUnicode = false; 328 hadSlash = false; 329 } catch (NumberFormatException nfe) { 330 throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe); 331 } 332 } 333 continue; 334 } 335 if (hadSlash) { 336 // handle an escaped value 337 hadSlash = false; 338 switch (ch) { 339 case '\\': 340 out.write('\\'); 341 break; 342 case '\'': 343 out.write('\''); 344 break; 345 case '\"': 346 out.write('"'); 347 break; 348 case 'r': 349 out.write('\r'); 350 break; 351 case 'f': 352 out.write('\f'); 353 break; 354 case 't': 355 out.write('\t'); 356 break; 357 case 'n': 358 out.write('\n'); 359 break; 360 case 'b': 361 out.write('\b'); 362 break; 363 case 'u': 364 { 365 // uh-oh, we're in unicode country.... 366 inUnicode = true; 367 break; 368 } 369 default : 370 out.write(ch); 371 break; 372 } 373 continue; 374 } else if (ch == '\\') { 375 hadSlash = true; 376 continue; 377 } 378 out.write(ch); 379 } 380 if (hadSlash) { 381 // then we're in the weird case of a \ at the end of the 382 // string, let's output it anyway. 383 out.write('\\'); 384 } 385 } 386 387 /** 388 * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p> 389 * 390 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> 391 * into a newline character, unless the <code>'\'</code> is preceded by another 392 * <code>'\'</code>.</p> 393 * 394 * @see #unescapeJava(String) 395 * @param str the <code>String</code> to unescape, may be null 396 * @return A new unescaped <code>String</code>, <code>null</code> if null string input 397 */ 398 public static String unescapeJavaScript(String str) { 399 return unescapeJava(str); 400 } 401 402 /** 403 * <p>Unescapes any JavaScript literals found in the <code>String</code> to a 404 * <code>Writer</code>.</p> 405 * 406 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> 407 * into a newline character, unless the <code>'\'</code> is preceded by another 408 * <code>'\'</code>.</p> 409 * 410 * <p>A <code>null</code> string input has no effect.</p> 411 * 412 * @see #unescapeJava(Writer,String) 413 * @param out the <code>Writer</code> used to output unescaped characters 414 * @param str the <code>String</code> to unescape, may be null 415 * @throws IllegalArgumentException if the Writer is <code>null</code> 416 * @throws IOException if error occurs on underlying Writer 417 */ 418 public static void unescapeJavaScript(Writer out, String str) throws IOException { 419 unescapeJava(out, str); 420 } 421 422 // HTML and XML 423 //-------------------------------------------------------------------------- 424 /** 425 * <p>Escapes the characters in a <code>String</code> using HTML entities.</p> 426 * 427 * <p> 428 * For example: 429 * </p> 430 * <p><code>"bread" & "butter"</code></p> 431 * becomes: 432 * <p> 433 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 434 * </p> 435 * 436 * <p>Supports all known HTML 4.0 entities, including funky accents. 437 * Note that the commonly used apostrophe escape character (&apos;) 438 * is not a legal entity and so is not supported). </p> 439 * 440 * @param str the <code>String</code> to escape, may be null 441 * @return a new escaped <code>String</code>, <code>null</code> if null string input 442 * 443 * @see #unescapeHtml(String) 444 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 445 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 446 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 447 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 448 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 449 */ 450 public static String escapeHtml(String str) { 451 if (str == null) { 452 return null; 453 } 454 try { 455 StringWriter writer = new StringWriter ((int)(str.length() * 1.5)); 456 escapeHtml(writer, str); 457 return writer.toString(); 458 } catch (IOException e) { 459 //assert false; 460 //should be impossible 461 e.printStackTrace(); 462 return null; 463 } 464 } 465 466 /** 467 * <p>Escapes the characters in a <code>String</code> using HTML entities and writes 468 * them to a <code>Writer</code>.</p> 469 * 470 * <p> 471 * For example: 472 * </p> 473 * <code>"bread" & "butter"</code> 474 * <p>becomes:</p> 475 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 476 * 477 * <p>Supports all known HTML 4.0 entities, including funky accents. 478 * Note that the commonly used apostrophe escape character (&apos;) 479 * is not a legal entity and so is not supported). </p> 480 * 481 * @param writer the writer receiving the escaped string, not null 482 * @param string the <code>String</code> to escape, may be null 483 * @throws IllegalArgumentException if the writer is null 484 * @throws IOException when <code>Writer</code> passed throws the exception from 485 * calls to the {@link Writer#write(int)} methods. 486 * 487 * @see #escapeHtml(String) 488 * @see #unescapeHtml(String) 489 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 490 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 491 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 492 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 493 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 494 */ 495 public static void escapeHtml(Writer writer, String string) throws IOException { 496 if (writer == null ) { 497 throw new IllegalArgumentException ("The Writer must not be null."); 498 } 499 if (string == null) { 500 return; 501 } 502 Entities.HTML40.escape(writer, string); 503 } 504 505 //----------------------------------------------------------------------- 506 /** 507 * <p>Unescapes a string containing entity escapes to a string 508 * containing the actual Unicode characters corresponding to the 509 * escapes. Supports HTML 4.0 entities.</p> 510 * 511 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" 512 * will become "<Français>"</p> 513 * 514 * <p>If an entity is unrecognized, it is left alone, and inserted 515 * verbatim into the result string. e.g. "&gt;&zzzz;x" will 516 * become ">&zzzz;x".</p> 517 * 518 * @param str the <code>String</code> to unescape, may be null 519 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 520 * @see #escapeHtml(Writer, String) 521 */ 522 public static String unescapeHtml(String str) { 523 if (str == null) { 524 return null; 525 } 526 try { 527 StringWriter writer = new StringWriter ((int)(str.length() * 1.5)); 528 unescapeHtml(writer, str); 529 return writer.toString(); 530 } catch (IOException e) { 531 //assert false; 532 //should be impossible 533 e.printStackTrace(); 534 return null; 535 } 536 } 537 538 /** 539 * <p>Unescapes a string containing entity escapes to a string 540 * containing the actual Unicode characters corresponding to the 541 * escapes. Supports HTML 4.0 entities.</p> 542 * 543 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" 544 * will become "<Français>"</p> 545 * 546 * <p>If an entity is unrecognized, it is left alone, and inserted 547 * verbatim into the result string. e.g. "&gt;&zzzz;x" will 548 * become ">&zzzz;x".</p> 549 * 550 * @param writer the writer receiving the unescaped string, not null 551 * @param string the <code>String</code> to unescape, may be null 552 * @throws IllegalArgumentException if the writer is null 553 * @throws IOException if an IOException occurs 554 * @see #escapeHtml(String) 555 */ 556 public static void unescapeHtml(Writer writer, String string) throws IOException { 557 if (writer == null ) { 558 throw new IllegalArgumentException ("The Writer must not be null."); 559 } 560 if (string == null) { 561 return; 562 } 563 Entities.HTML40.unescape(writer, string); 564 } 565 566 //----------------------------------------------------------------------- 567 /** 568 * <p>Escapes the characters in a <code>String</code> using XML entities.</p> 569 * 570 * <p>For example: <tt>"bread" & "butter"</tt> => 571 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 572 * </p> 573 * 574 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 575 * Does not support DTDs or external entities.</p> 576 * 577 * <p>Note that unicode characters greater than 0x7f are currently escaped to 578 * their numerical \\u equivalent. This may change in future releases. </p> 579 * 580 * @param writer the writer receiving the unescaped string, not null 581 * @param str the <code>String</code> to escape, may be null 582 * @throws IllegalArgumentException if the writer is null 583 * @throws IOException if there is a problem writing 584 * @see #unescapeXml(java.lang.String) 585 */ 586 public static void escapeXml(Writer writer, String str) throws IOException { 587 if (writer == null ) { 588 throw new IllegalArgumentException ("The Writer must not be null."); 589 } 590 if (str == null) { 591 return; 592 } 593 Entities.XML.escape(writer, str); 594 } 595 596 /** 597 * <p>Escapes the characters in a <code>String</code> using XML entities.</p> 598 * 599 * <p>For example: <tt>"bread" & "butter"</tt> => 600 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 601 * </p> 602 * 603 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 604 * Does not support DTDs or external entities.</p> 605 * 606 * <p>Note that unicode characters greater than 0x7f are currently escaped to 607 * their numerical \\u equivalent. This may change in future releases. </p> 608 * 609 * @param str the <code>String</code> to escape, may be null 610 * @return a new escaped <code>String</code>, <code>null</code> if null string input 611 * @see #unescapeXml(java.lang.String) 612 */ 613 public static String escapeXml(String str) { 614 if (str == null) { 615 return null; 616 } 617 return Entities.XML.escape(str); 618 } 619 620 //----------------------------------------------------------------------- 621 /** 622 * <p>Unescapes a string containing XML entity escapes to a string 623 * containing the actual Unicode characters corresponding to the 624 * escapes.</p> 625 * 626 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 627 * Does not support DTDs or external entities.</p> 628 * 629 * <p>Note that numerical \\u unicode codes are unescaped to their respective 630 * unicode characters. This may change in future releases. </p> 631 * 632 * @param writer the writer receiving the unescaped string, not null 633 * @param str the <code>String</code> to unescape, may be null 634 * @throws IllegalArgumentException if the writer is null 635 * @throws IOException if there is a problem writing 636 * @see #escapeXml(String) 637 */ 638 public static void unescapeXml(Writer writer, String str) throws IOException { 639 if (writer == null ) { 640 throw new IllegalArgumentException ("The Writer must not be null."); 641 } 642 if (str == null) { 643 return; 644 } 645 Entities.XML.unescape(writer, str); 646 } 647 648 /** 649 * <p>Unescapes a string containing XML entity escapes to a string 650 * containing the actual Unicode characters corresponding to the 651 * escapes.</p> 652 * 653 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 654 * Does not support DTDs or external entities.</p> 655 * 656 * <p>Note that numerical \\u unicode codes are unescaped to their respective 657 * unicode characters. This may change in future releases. </p> 658 * 659 * @param str the <code>String</code> to unescape, may be null 660 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 661 * @see #escapeXml(String) 662 */ 663 public static String unescapeXml(String str) { 664 if (str == null) { 665 return null; 666 } 667 return Entities.XML.unescape(str); 668 } 669 670 //----------------------------------------------------------------------- 671 /** 672 * <p>Escapes the characters in a <code>String</code> to be suitable to pass to 673 * an SQL query.</p> 674 * 675 * <p>For example, 676 * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" + 677 * StringEscapeUtils.escapeSql("McHale's Navy") + 678 * "'");</pre> 679 * </p> 680 * 681 * <p>At present, this method only turns single-quotes into doubled single-quotes 682 * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not 683 * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p> 684 * 685 * see http://www.jguru.com/faq/view.jsp?EID=8881 686 * @param str the string to escape, may be null 687 * @return a new String, escaped for SQL, <code>null</code> if null string input 688 */ 689 public static String escapeSql(String str) { 690 if (str == null) { 691 return null; 692 } 693 return StringUtils.replace(str, "'", "''"); 694 } 695 696 //----------------------------------------------------------------------- 697 698 /** 699 * <p>Returns a <code>String</code> value for a CSV column enclosed in double quotes, 700 * if required.</p> 701 * 702 * <p>If the value contains a comma, newline or double quote, then the 703 * String value is returned enclosed in double quotes.</p> 704 * </p> 705 * 706 * <p>Any double quote characters in the value are escaped with another double quote.</p> 707 * 708 * <p>If the value does not contain a comma, newline or double quote, then the 709 * String value is returned unchanged.</p> 710 * </p> 711 * 712 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 713 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 714 * 715 * @param str the input CSV column String, may be null 716 * @return the input String, enclosed in double quotes if the value contains a comma, 717 * newline or double quote, <code>null</code> if null string input 718 * @since 2.4 719 */ 720 public static String escapeCsv(String str) { 721 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { 722 return str; 723 } 724 try { 725 StringWriter writer = new StringWriter(); 726 escapeCsv(writer, str); 727 return writer.toString(); 728 } catch (IOException ioe) { 729 // this should never ever happen while writing to a StringWriter 730 ioe.printStackTrace(); 731 return null; 732 } 733 } 734 735 /** 736 * <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes, 737 * if required.</p> 738 * 739 * <p>If the value contains a comma, newline or double quote, then the 740 * String value is written enclosed in double quotes.</p> 741 * </p> 742 * 743 * <p>Any double quote characters in the value are escaped with another double quote.</p> 744 * 745 * <p>If the value does not contain a comma, newline or double quote, then the 746 * String value is written unchanged (null values are ignored).</p> 747 * </p> 748 * 749 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 750 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 751 * 752 * @param str the input CSV column String, may be null 753 * @param out Writer to write input string to, enclosed in double quotes if it contains 754 * a comma, newline or double quote 755 * @throws IOException if error occurs on underlying Writer 756 * @since 2.4 757 */ 758 public static void escapeCsv(Writer out, String str) throws IOException { 759 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { 760 if (str != null) { 761 out.write(str); 762 } 763 return; 764 } 765 out.write(CSV_QUOTE); 766 for (int i = 0; i < str.length(); i++) { 767 char c = str.charAt(i); 768 if (c == CSV_QUOTE) { 769 out.write(CSV_QUOTE); // escape double quote 770 } 771 out.write(c); 772 } 773 out.write(CSV_QUOTE); 774 } 775 776 /** 777 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> 778 * 779 * <p>If the value is enclosed in double quotes, and contains a comma, newline 780 * or double quote, then quotes are removed. 781 * </p> 782 * 783 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 784 * to just one double quote. </p> 785 * 786 * <p>If the value is not enclosed in double quotes, or is and does not contain a 787 * comma, newline or double quote, then the String value is returned unchanged.</p> 788 * </p> 789 * 790 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 791 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 792 * 793 * @param str the input CSV column String, may be null 794 * @return the input String, with enclosing double quotes removed and embedded double 795 * quotes unescaped, <code>null</code> if null string input 796 * @since 2.4 797 */ 798 public static String unescapeCsv(String str) { 799 if (str == null) { 800 return null; 801 } 802 try { 803 StringWriter writer = new StringWriter(); 804 unescapeCsv(writer, str); 805 return writer.toString(); 806 } catch (IOException ioe) { 807 // this should never ever happen while writing to a StringWriter 808 ioe.printStackTrace(); 809 return null; 810 } 811 } 812 813 /** 814 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> 815 * 816 * <p>If the value is enclosed in double quotes, and contains a comma, newline 817 * or double quote, then quotes are removed. 818 * </p> 819 * 820 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 821 * to just one double quote. </p> 822 * 823 * <p>If the value is not enclosed in double quotes, or is and does not contain a 824 * comma, newline or double quote, then the String value is returned unchanged.</p> 825 * </p> 826 * 827 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 828 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 829 * 830 * @param str the input CSV column String, may be null 831 * @param out Writer to write the input String to, with enclosing double quotes 832 * removed and embedded double quotes unescaped, <code>null</code> if null string input 833 * @throws IOException if error occurs on underlying Writer 834 * @since 2.4 835 */ 836 public static void unescapeCsv(Writer out, String str) throws IOException { 837 if (str == null) { 838 return; 839 } 840 if (str.length() < 2) { 841 out.write(str); 842 return; 843 } 844 if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) { 845 out.write(str); 846 return; 847 } 848 849 // strip quotes 850 String quoteless = str.substring(1, str.length() - 1); 851 852 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 853 // deal with escaped quotes; ie) "" 854 str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR); 855 } 856 857 out.write(str); 858 } 859 860 }