001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.csv; 018 019 import java.io.IOException; 020 import java.io.OutputStream; 021 import java.io.PrintWriter; 022 import java.io.Writer; 023 024 /** 025 * Print values as a comma separated list. 026 */ 027 public class CSVPrinter { 028 029 /** 030 * The place that the values get written. 031 */ 032 protected final Writer out; 033 protected final CSVStrategy strategy; 034 035 /** 036 * True if we just began a new line. 037 */ 038 protected boolean newLine = true; 039 040 protected char[] buf = new char[0]; // temporary buffer 041 042 /** 043 * Create a printer that will print values to the given 044 * stream following the CSVStrategy. 045 * <p/> 046 * Currently, only a pure encapsulation strategy or a pure escaping strategy 047 * is supported. Hybrid strategies (encapsulation and escaping with a different character) are not supported. 048 * 049 * @param out stream to which to print. 050 * @param strategy describes the CSV variation. 051 */ 052 public CSVPrinter(Writer out, CSVStrategy strategy) { 053 this.out = out; 054 this.strategy = strategy == null ? CSVStrategy.DEFAULT_STRATEGY : strategy; 055 } 056 057 // ====================================================== 058 // printing implementation 059 // ====================================================== 060 061 /** 062 * Output a blank line 063 */ 064 public void println() throws IOException { 065 out.write(strategy.getPrinterNewline()); 066 newLine = true; 067 } 068 069 public void flush() throws IOException { 070 out.flush(); 071 } 072 073 074 /** 075 * Print a single line of comma separated values. 076 * The values will be quoted if needed. Quotes and 077 * newLine characters will be escaped. 078 * 079 * @param values values to be outputted. 080 */ 081 public void println(String[] values) throws IOException { 082 for (int i = 0; i < values.length; i++) { 083 print(values[i]); 084 } 085 println(); 086 } 087 088 089 /** 090 * Put a comment among the comma separated values. 091 * Comments will always begin on a new line and occupy a 092 * least one full line. The character specified to star 093 * comments and a space will be inserted at the beginning of 094 * each new line in the comment. 095 * 096 * @param comment the comment to output 097 */ 098 public void printlnComment(String comment) throws IOException { 099 if (this.strategy.isCommentingDisabled()) { 100 return; 101 } 102 if (!newLine) { 103 println(); 104 } 105 out.write(this.strategy.getCommentStart()); 106 out.write(' '); 107 for (int i = 0; i < comment.length(); i++) { 108 char c = comment.charAt(i); 109 switch (c) { 110 case '\r': 111 if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') { 112 i++; 113 } 114 // break intentionally excluded. 115 case '\n': 116 println(); 117 out.write(this.strategy.getCommentStart()); 118 out.write(' '); 119 break; 120 default: 121 out.write(c); 122 break; 123 } 124 } 125 println(); 126 } 127 128 129 public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException { 130 if (!checkForEscape) { 131 printSep(); 132 out.write(value, offset, len); 133 return; 134 } 135 136 if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) { 137 printAndEncapsulate(value, offset, len); 138 } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) { 139 printAndEscape(value, offset, len); 140 } else { 141 printSep(); 142 out.write(value, offset, len); 143 } 144 } 145 146 void printSep() throws IOException { 147 if (newLine) { 148 newLine = false; 149 } else { 150 out.write(this.strategy.getDelimiter()); 151 } 152 } 153 154 void printAndEscape(char[] value, int offset, int len) throws IOException { 155 int start = offset; 156 int pos = offset; 157 int end = offset + len; 158 159 printSep(); 160 161 char delim = this.strategy.getDelimiter(); 162 char escape = this.strategy.getEscape(); 163 164 while (pos < end) { 165 char c = value[pos]; 166 if (c == '\r' || c == '\n' || c == delim || c == escape) { 167 // write out segment up until this char 168 int l = pos - start; 169 if (l > 0) { 170 out.write(value, start, l); 171 } 172 if (c == '\n') { 173 c = 'n'; 174 } else if (c == '\r') { 175 c = 'r'; 176 } 177 178 out.write(escape); 179 out.write(c); 180 181 start = pos + 1; // start on the current char after this one 182 } 183 184 pos++; 185 } 186 187 // write last segment 188 int l = pos - start; 189 if (l > 0) { 190 out.write(value, start, l); 191 } 192 } 193 194 void printAndEncapsulate(char[] value, int offset, int len) throws IOException { 195 boolean first = newLine; // is this the first value on this line? 196 boolean quote = false; 197 int start = offset; 198 int pos = offset; 199 int end = offset + len; 200 201 printSep(); 202 203 char delim = this.strategy.getDelimiter(); 204 char encapsulator = this.strategy.getEncapsulator(); 205 206 if (len <= 0) { 207 // always quote an empty token that is the first 208 // on the line, as it may be the only thing on the 209 // line. If it were not quoted in that case, 210 // an empty line has no tokens. 211 if (first) { 212 quote = true; 213 } 214 } else { 215 char c = value[pos]; 216 217 // Hmmm, where did this rule come from? 218 if (first 219 && (c < '0' 220 || (c > '9' && c < 'A') 221 || (c > 'Z' && c < 'a') 222 || (c > 'z'))) { 223 quote = true; 224 // } else if (c == ' ' || c == '\f' || c == '\t') { 225 } else if (c <= '#') { 226 // Some other chars at the start of a value caused the parser to fail, so for now 227 // encapsulate if we start in anything less than '#'. We are being conservative 228 // by including the default comment char too. 229 quote = true; 230 } else { 231 while (pos < end) { 232 c = value[pos]; 233 if (c == '\n' || c == '\r' || c == encapsulator || c == delim) { 234 quote = true; 235 break; 236 } 237 pos++; 238 } 239 240 if (!quote) { 241 pos = end - 1; 242 c = value[pos]; 243 // if (c == ' ' || c == '\f' || c == '\t') { 244 // Some other chars at the end caused the parser to fail, so for now 245 // encapsulate if we end in anything less than ' ' 246 if (c <= ' ') { 247 quote = true; 248 } 249 } 250 } 251 } 252 253 if (!quote) { 254 // no encapsulation needed - write out the original value 255 out.write(value, offset, len); 256 return; 257 } 258 259 // we hit something that needed encapsulation 260 out.write(encapsulator); 261 262 // Pick up where we left off: pos should be positioned on the first character that caused 263 // the need for encapsulation. 264 while (pos < end) { 265 char c = value[pos]; 266 if (c == encapsulator) { 267 // write out the chunk up until this point 268 269 // add 1 to the length to write out the encapsulator also 270 out.write(value, start, pos - start + 1); 271 // put the next starting position on the encapsulator so we will 272 // write it out again with the next string (effectively doubling it) 273 start = pos; 274 } 275 pos++; 276 } 277 278 // write the last segment 279 out.write(value, start, pos - start); 280 out.write(encapsulator); 281 } 282 283 /** 284 * Print the string as the next value on the line. The value 285 * will be escaped or encapsulated as needed if checkForEscape==true 286 * 287 * @param value value to be outputted. 288 */ 289 public void print(String value, boolean checkForEscape) throws IOException { 290 if (!checkForEscape) { 291 // write directly from string 292 printSep(); 293 out.write(value); 294 return; 295 } 296 297 if (buf.length < value.length()) { 298 buf = new char[value.length()]; 299 } 300 301 value.getChars(0, value.length(), buf, 0); 302 print(buf, 0, value.length(), checkForEscape); 303 } 304 305 /** 306 * Print the string as the next value on the line. The value 307 * will be escaped or encapsulated as needed. 308 * 309 * @param value value to be outputted. 310 */ 311 public void print(String value) throws IOException { 312 print(value, true); 313 } 314 }