1 /* 2 * Copyright 2007 Kasper B. Graversen 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package org.supercsv.prefs; 17 18 import org.supercsv.comment.CommentMatcher; 19 import org.supercsv.encoder.CsvEncoder; 20 import org.supercsv.encoder.DefaultCsvEncoder; 21 import org.supercsv.quote.NormalQuoteMode; 22 import org.supercsv.quote.QuoteMode; 23 24 /** 25 * Before reading or writing CSV files, you must supply the reader/writer with some preferences. 26 * <p> 27 * <strong>Please note:</strong> the end of line symbols are <em>only</em> used for writing. 28 * </p> 29 * <table border="0" cellpadding="1" > 30 * <caption>Predefined configurations</caption> <tbody> 31 * <tr> 32 * <th align="left">Constant</th> 33 * <th align="left">Quote character</th> 34 * <th align="left">Delimiter character</th> 35 * <th align="left">End of line symbols</th> 36 * </tr> 37 * <tr> 38 * <td><code>STANDARD_PREFERENCE</code></td> 39 * <td><code>"</code></td> 40 * <td><code>,</code></td> 41 * <td><code>\r\n</code></td> 42 * </tr> 43 * <tr> 44 * <td><code>EXCEL_PREFERENCE</code></td> 45 * <td><code>"</code></td> 46 * <td><code>,</code></td> 47 * <td><code>\n</code></td> 48 * </tr> 49 * <tr> 50 * <td><code>EXCEL_NORTH_EUROPE_PREFERENCE</code></td> 51 * <td><code>"</code></td> 52 * <td><code>;</code></td> 53 * <td><code>\n</code></td> 54 * </tr> 55 * <tr> 56 * <td><code>TAB_PREFERENCE</code></td> 57 * <td><code>"</code></td> 58 * <td><code>\t</code></td> 59 * <td><code>\n</code></td> 60 * </tr> 61 * </tbody> 62 * </table> 63 * <p> 64 * By default, spaces surrounding an unquoted cell are treated as part of the data. In versions of Super CSV prior to 65 * 2.0.0 this wasn't the case, and any surrounding spaces that weren't within quotes were ignored when reading (and 66 * quotes were automatically added to Strings containing surrounding spaces when writing). 67 * </p> 68 * <p> 69 * If you wish enable this functionality again, then you can create a CsvPreference with the 70 * <tt>surroundingSpacesNeedQuotes</tt> flag set to true (the default is false). This means that surrounding spaces 71 * without quotes will be trimmed when reading, and quotes will automatically be added for Strings containing 72 * surrounding spaces when writing. 73 * </p> 74 * <p> 75 * You can apply the surroundingSpacesNeedQuotes property to an existing preference as follows: 76 * </p> 77 * <p> 78 * {@code private static final CsvPreference STANDARD_SURROUNDING_SPACES_NEED_QUOTES = new CsvPreference.Builder(CsvPreference.STANDARD_PREFERENCE).surroundingSpacesNeedQuotes(true).build();} 79 * </p> 80 * <p> 81 * You can also create your own preferences. For example if your file was pipe-delimited, you could use the following: 82 * </p> 83 * <p> 84 * {@code private static final CsvPreference PIPE_DELIMITED = new CsvPreference.Builder('"', '|', "\n").build();} 85 * </p> 86 * Other preferences incude: 87 * <ul> 88 * <li>using a custom {@link CsvEncoder} when writing CSV (if you want complete control over how the CSV is encoded)</li> 89 * <li>using a custom {@link QuoteMode} when writing CSV (if you want to enable quotes when they're not normally 90 * required). You can use the existing {@link org.supercsv.quote.AlwaysQuoteMode AlwaysQuoteMode}, 91 * {@link org.supercsv.quote.ColumnQuoteMode ColumnQuoteMode}, or supply your own.</li> 92 * <li>using a custom {@link CommentMatcher} to skip comments when reading CSV. You can use the existing 93 * {@link org.supercsv.comment.CommentStartsWith CommentStartsWith}, {@link org.supercsv.comment.CommentMatches 94 * CommentMatches}, or supply your own.</li> 95 * <li>ignoring empty lines (enabled by default)</li> 96 * <li>setting the maximum number of lines a row of CSV can span (useful for debugging files with mismatched quotes)</li> 97 * </ul> 98 * 99 * @author Kasper B. Graversen 100 * @author James Bassett 101 */ 102 public final class CsvPreference { 103 104 /** 105 * Ready to use configuration that should cover 99% of all usages. 106 */ 107 public static final CsvPreference STANDARD_PREFERENCE = new CsvPreference.Builder('"', ',', "\r\n").build(); 108 109 /** 110 * Ready to use configuration for Windows Excel exported CSV files. 111 */ 112 public static final CsvPreference EXCEL_PREFERENCE = new CsvPreference.Builder('"', ',', "\n").build(); 113 114 /** 115 * Ready to use configuration for north European excel CSV files (columns are separated by ";" instead of ",") 116 */ 117 public static final CsvPreference EXCEL_NORTH_EUROPE_PREFERENCE = new CsvPreference.Builder('"', ';', "\n").build(); 118 119 /** 120 * Ready to use configuration for tab-delimited files. 121 */ 122 public static final CsvPreference TAB_PREFERENCE = new CsvPreference.Builder('"', '\t', "\n").build(); 123 124 private final char quoteChar; 125 126 private final int delimiterChar; 127 128 private final String endOfLineSymbols; 129 130 private final boolean surroundingSpacesNeedQuotes; 131 132 private final boolean ignoreEmptyLines; 133 134 private final CsvEncoder encoder; 135 136 private final QuoteMode quoteMode; 137 138 private final CommentMatcher commentMatcher; 139 140 private int maxLinesPerRow = 0; 141 142 /** 143 * Constructs a new <tt>CsvPreference</tt> from a Builder. 144 */ 145 private CsvPreference(Builder builder) { 146 this.quoteChar = builder.quoteChar; 147 this.delimiterChar = builder.delimiterChar; 148 this.endOfLineSymbols = builder.endOfLineSymbols; 149 this.surroundingSpacesNeedQuotes = builder.surroundingSpacesNeedQuotes; 150 this.ignoreEmptyLines = builder.ignoreEmptyLines; 151 this.commentMatcher = builder.commentMatcher; 152 this.encoder = builder.encoder; 153 this.quoteMode = builder.quoteMode; 154 this.maxLinesPerRow = builder.maxLinesPerRow; 155 } 156 157 /** 158 * Returns the delimiter character 159 * 160 * @return the delimiter character 161 */ 162 public int getDelimiterChar() { 163 return delimiterChar; 164 } 165 166 /** 167 * Returns the end of line symbols 168 * 169 * @return the end of line symbols 170 */ 171 public String getEndOfLineSymbols() { 172 return endOfLineSymbols; 173 } 174 175 /** 176 * Returns the quote character 177 * 178 * @return the quote character 179 */ 180 public char getQuoteChar() { 181 return quoteChar; 182 } 183 184 /** 185 * Returns the surroundingSpacesNeedQuotes flag. 186 * 187 * @return the surroundingSpacesNeedQuotes flag 188 */ 189 public boolean isSurroundingSpacesNeedQuotes() { 190 return surroundingSpacesNeedQuotes; 191 } 192 193 /** 194 * Returns the ignoreEmptyLines flag. 195 * 196 * @return the ignoreEmptyLines flag 197 */ 198 public boolean isIgnoreEmptyLines() { 199 return ignoreEmptyLines; 200 } 201 202 /** 203 * Returns the CSV encoder. 204 * 205 * @return the CSV encoder 206 */ 207 public CsvEncoder getEncoder() { 208 return encoder; 209 } 210 211 /** 212 * Returns the quote mode. 213 * 214 * @return the quote mode 215 */ 216 public QuoteMode getQuoteMode() { 217 return quoteMode; 218 } 219 220 /** 221 * Returns the comment matcher. 222 * 223 * @return the comment matcher 224 */ 225 public CommentMatcher getCommentMatcher() { 226 return commentMatcher; 227 } 228 229 /** 230 * Returns the maximum number of lines a row can span. 231 * 232 * @return the maximum number of lines a row can span 233 */ 234 public int getMaxLinesPerRow() { 235 return maxLinesPerRow; 236 } 237 238 /** 239 * Builds immutable <tt>CsvPreference</tt> instances. The builder pattern allows for additional preferences to be 240 * added in the future. 241 */ 242 public static class Builder { 243 244 private final char quoteChar; 245 246 private final int delimiterChar; 247 248 private final String endOfLineSymbols; 249 250 private boolean surroundingSpacesNeedQuotes = false; 251 252 private boolean ignoreEmptyLines = true; 253 254 private CsvEncoder encoder; 255 256 private QuoteMode quoteMode; 257 258 private CommentMatcher commentMatcher; 259 260 private int maxLinesPerRow = 0; 261 262 /** 263 * Constructs a Builder with all of the values from an existing <tt>CsvPreference</tt> instance. Useful if you 264 * want to base your preferences off one of the existing CsvPreference constants. 265 * 266 * @param preference 267 * the existing preference 268 */ 269 public Builder(final CsvPreference preference) { 270 this.quoteChar = preference.quoteChar; 271 this.delimiterChar = preference.delimiterChar; 272 this.endOfLineSymbols = preference.endOfLineSymbols; 273 this.surroundingSpacesNeedQuotes = preference.surroundingSpacesNeedQuotes; 274 this.ignoreEmptyLines = preference.ignoreEmptyLines; 275 this.encoder = preference.encoder; 276 this.quoteMode = preference.quoteMode; 277 this.commentMatcher = preference.commentMatcher; 278 this.maxLinesPerRow = preference.maxLinesPerRow; 279 } 280 281 /** 282 * Constructs a Builder with the mandatory preference values. 283 * 284 * @param quoteChar 285 * matching pairs of this character are used to escape columns containing the delimiter 286 * @param delimiterChar 287 * the character separating each column 288 * @param endOfLineSymbols 289 * one or more symbols terminating the line, e.g. "\n". Only used for writing. 290 * @throws IllegalArgumentException 291 * if quoteChar and delimiterChar are the same character 292 * @throws NullPointerException 293 * if endOfLineSymbols is null 294 */ 295 public Builder(final char quoteChar, final int delimiterChar, final String endOfLineSymbols) { 296 if( quoteChar == delimiterChar ) { 297 throw new IllegalArgumentException(String.format( 298 "quoteChar and delimiterChar should not be the same character: %c", quoteChar)); 299 } else if( endOfLineSymbols == null ) { 300 throw new NullPointerException("endOfLineSymbols should not be null"); 301 } 302 this.quoteChar = quoteChar; 303 this.delimiterChar = delimiterChar; 304 this.endOfLineSymbols = endOfLineSymbols; 305 } 306 307 /** 308 * Flag indicating whether spaces at the beginning or end of a cell should be ignored if they're not surrounded 309 * by quotes (applicable to both reading and writing CSV). The default is <tt>false</tt>, as spaces 310 * "are considered part of a field and should not be ignored" according to RFC 4180. 311 * 312 * @since 2.0.0 313 * @param surroundingSpacesNeedQuotes 314 * flag indicating whether spaces at the beginning or end of a cell should be ignored if they're not 315 * surrounded by quotes 316 * @return the updated Builder 317 */ 318 public Builder surroundingSpacesNeedQuotes(final boolean surroundingSpacesNeedQuotes) { 319 this.surroundingSpacesNeedQuotes = surroundingSpacesNeedQuotes; 320 return this; 321 } 322 323 /** 324 * Flag indicating whether empty lines (i.e. containing only end of line symbols) should be ignored. The default 325 * is <tt>true</tt>. 326 * 327 * @since 2.2.1 328 * @param ignoreEmptyLines 329 * flag indicating whether empty lines should be ignored 330 * @return the updated Builder 331 */ 332 public Builder ignoreEmptyLines(final boolean ignoreEmptyLines) { 333 this.ignoreEmptyLines = ignoreEmptyLines; 334 return this; 335 } 336 337 /** 338 * Enables the skipping of comments. You can supply your own comment matcher or use one of the predefined ones: 339 * {@link org.supercsv.comment.CommentStartsWith CommentStartsWith} or 340 * {@link org.supercsv.comment.CommentMatches CommentMatches} 341 * 342 * @since 2.1.0 343 * @param commentMatcher 344 * the comment matcher to use 345 * @return the updated Builder 346 * @throws NullPointerException 347 * if commentMatcher is null 348 */ 349 public Builder skipComments(final CommentMatcher commentMatcher) { 350 if( commentMatcher == null ) { 351 throw new NullPointerException("commentMatcher should not be null"); 352 } 353 this.commentMatcher = commentMatcher; 354 return this; 355 } 356 357 /** 358 * Uses a custom CsvEncoder to escape CSV for writing. 359 * 360 * @since 2.1.0 361 * @param encoder 362 * the custom encoder 363 * @return the updated Builder 364 * @throws NullPointerException 365 * if encoder is null 366 */ 367 public Builder useEncoder(final CsvEncoder encoder) { 368 if( encoder == null ) { 369 throw new NullPointerException("encoder should not be null"); 370 } 371 this.encoder = encoder; 372 return this; 373 } 374 375 /** 376 * Uses a custom QuoteMode to determine if surrounding quotes should be applied when writing (only applicable if 377 * a column doesn't contain any special characters and wouldn't otherwise be quoted). You can supply your own 378 * quote mode or use one of the predefined ones: {@link org.supercsv.quote.AlwaysQuoteMode AlwaysQuoteMode} or 379 * {@link org.supercsv.quote.ColumnQuoteMode ColumnQuoteMode} 380 * 381 * @since 2.1.0 382 * @param quoteMode 383 * the quote mode 384 * @return the updated Builder 385 * @throws NullPointerException 386 * if quoteMode is null 387 */ 388 public Builder useQuoteMode(final QuoteMode quoteMode) { 389 if( quoteMode == null ) { 390 throw new NullPointerException("quoteMode should not be null"); 391 } 392 this.quoteMode = quoteMode; 393 return this; 394 } 395 396 /** 397 * The maximum number of lines that a row can span before an exception is thrown (only applicable when reading 398 * CSV). This option allows CSV readers to fail fast when encountering CSV with mismatching quotes - the normal 399 * behaviour would be to continue reading until the matching quote is found, which could potentially mean 400 * reading the whole file (and exhausting all available memory). Zero or a negative value will disable this 401 * option. The default is <tt>0</tt>. 402 * 403 * @since 2.4.0 404 * @param maxLinesPerRow 405 * the maximum number of lines a row can span before an exception is thrown 406 * @return the updated Builder 407 */ 408 public Builder maxLinesPerRow(final int maxLinesPerRow) { 409 this.maxLinesPerRow = maxLinesPerRow; 410 return this; 411 } 412 413 /** 414 * Builds the CsvPreference instance. 415 * 416 * @return the immutable CsvPreference instance 417 */ 418 public CsvPreference build() { 419 420 if( encoder == null ) { 421 encoder = new DefaultCsvEncoder(); 422 } 423 424 if( quoteMode == null ) { 425 quoteMode = new NormalQuoteMode(); 426 } 427 428 return new CsvPreference(this); 429 } 430 431 } 432 433 }