1 /*
2 * Copyright 2007 Kasper B. Graversen
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package org.supercsv.prefs;
17
18 import org.supercsv.comment.CommentMatcher;
19 import org.supercsv.encoder.CsvEncoder;
20 import org.supercsv.encoder.DefaultCsvEncoder;
21 import org.supercsv.quote.NormalQuoteMode;
22 import org.supercsv.quote.QuoteMode;
23
24 /**
25 * Before reading or writing CSV files, you must supply the reader/writer with some preferences.
26 * <p>
27 * <strong>Please note:</strong> the end of line symbols are <em>only</em> used for writing.
28 * </p>
29 * <table border="0" cellpadding="1" >
30 * <caption>Predefined configurations</caption> <tbody>
31 * <tr>
32 * <th align="left">Constant</th>
33 * <th align="left">Quote character</th>
34 * <th align="left">Delimiter character</th>
35 * <th align="left">End of line symbols</th>
36 * </tr>
37 * <tr>
38 * <td><code>STANDARD_PREFERENCE</code></td>
39 * <td><code>"</code></td>
40 * <td><code>,</code></td>
41 * <td><code>\r\n</code></td>
42 * </tr>
43 * <tr>
44 * <td><code>EXCEL_PREFERENCE</code></td>
45 * <td><code>"</code></td>
46 * <td><code>,</code></td>
47 * <td><code>\n</code></td>
48 * </tr>
49 * <tr>
50 * <td><code>EXCEL_NORTH_EUROPE_PREFERENCE</code></td>
51 * <td><code>"</code></td>
52 * <td><code>;</code></td>
53 * <td><code>\n</code></td>
54 * </tr>
55 * <tr>
56 * <td><code>TAB_PREFERENCE</code></td>
57 * <td><code>"</code></td>
58 * <td><code>\t</code></td>
59 * <td><code>\n</code></td>
60 * </tr>
61 * </tbody>
62 * </table>
63 * <p>
64 * By default, spaces surrounding an unquoted cell are treated as part of the data. In versions of Super CSV prior to
65 * 2.0.0 this wasn't the case, and any surrounding spaces that weren't within quotes were ignored when reading (and
66 * quotes were automatically added to Strings containing surrounding spaces when writing).
67 * </p>
68 * <p>
69 * If you wish enable this functionality again, then you can create a CsvPreference with the
70 * <tt>surroundingSpacesNeedQuotes</tt> flag set to true (the default is false). This means that surrounding spaces
71 * without quotes will be trimmed when reading, and quotes will automatically be added for Strings containing
72 * surrounding spaces when writing.
73 * </p>
74 * <p>
75 * You can apply the surroundingSpacesNeedQuotes property to an existing preference as follows:
76 * </p>
77 * <p>
78 * {@code private static final CsvPreference STANDARD_SURROUNDING_SPACES_NEED_QUOTES = new CsvPreference.Builder(CsvPreference.STANDARD_PREFERENCE).surroundingSpacesNeedQuotes(true).build();}
79 * </p>
80 * <p>
81 * You can also create your own preferences. For example if your file was pipe-delimited, you could use the following:
82 * </p>
83 * <p>
84 * {@code private static final CsvPreference PIPE_DELIMITED = new CsvPreference.Builder('"', '|', "\n").build();}
85 * </p>
86 * Other preferences incude:
87 * <ul>
88 * <li>using a custom {@link CsvEncoder} when writing CSV (if you want complete control over how the CSV is encoded)</li>
89 * <li>using a custom {@link QuoteMode} when writing CSV (if you want to enable quotes when they're not normally
90 * required). You can use the existing {@link org.supercsv.quote.AlwaysQuoteMode AlwaysQuoteMode},
91 * {@link org.supercsv.quote.ColumnQuoteMode ColumnQuoteMode}, or supply your own.</li>
92 * <li>using a custom {@link CommentMatcher} to skip comments when reading CSV. You can use the existing
93 * {@link org.supercsv.comment.CommentStartsWith CommentStartsWith}, {@link org.supercsv.comment.CommentMatches
94 * CommentMatches}, or supply your own.</li>
95 * <li>ignoring empty lines (enabled by default)</li>
96 * <li>setting the maximum number of lines a row of CSV can span (useful for debugging files with mismatched quotes)</li>
97 * </ul>
98 *
99 * @author Kasper B. Graversen
100 * @author James Bassett
101 */
102 public final class CsvPreference {
103
104 /**
105 * Ready to use configuration that should cover 99% of all usages.
106 */
107 public static final CsvPreference STANDARD_PREFERENCE = new CsvPreference.Builder('"', ',', "\r\n").build();
108
109 /**
110 * Ready to use configuration for Windows Excel exported CSV files.
111 */
112 public static final CsvPreference EXCEL_PREFERENCE = new CsvPreference.Builder('"', ',', "\n").build();
113
114 /**
115 * Ready to use configuration for north European excel CSV files (columns are separated by ";" instead of ",")
116 */
117 public static final CsvPreference EXCEL_NORTH_EUROPE_PREFERENCE = new CsvPreference.Builder('"', ';', "\n").build();
118
119 /**
120 * Ready to use configuration for tab-delimited files.
121 */
122 public static final CsvPreference TAB_PREFERENCE = new CsvPreference.Builder('"', '\t', "\n").build();
123
124 private final char quoteChar;
125
126 private final int delimiterChar;
127
128 private final String endOfLineSymbols;
129
130 private final boolean surroundingSpacesNeedQuotes;
131
132 private final boolean ignoreEmptyLines;
133
134 private final CsvEncoder encoder;
135
136 private final QuoteMode quoteMode;
137
138 private final CommentMatcher commentMatcher;
139
140 private int maxLinesPerRow = 0;
141
142 /**
143 * Constructs a new <tt>CsvPreference</tt> from a Builder.
144 */
145 private CsvPreference(Builder builder) {
146 this.quoteChar = builder.quoteChar;
147 this.delimiterChar = builder.delimiterChar;
148 this.endOfLineSymbols = builder.endOfLineSymbols;
149 this.surroundingSpacesNeedQuotes = builder.surroundingSpacesNeedQuotes;
150 this.ignoreEmptyLines = builder.ignoreEmptyLines;
151 this.commentMatcher = builder.commentMatcher;
152 this.encoder = builder.encoder;
153 this.quoteMode = builder.quoteMode;
154 this.maxLinesPerRow = builder.maxLinesPerRow;
155 }
156
157 /**
158 * Returns the delimiter character
159 *
160 * @return the delimiter character
161 */
162 public int getDelimiterChar() {
163 return delimiterChar;
164 }
165
166 /**
167 * Returns the end of line symbols
168 *
169 * @return the end of line symbols
170 */
171 public String getEndOfLineSymbols() {
172 return endOfLineSymbols;
173 }
174
175 /**
176 * Returns the quote character
177 *
178 * @return the quote character
179 */
180 public char getQuoteChar() {
181 return quoteChar;
182 }
183
184 /**
185 * Returns the surroundingSpacesNeedQuotes flag.
186 *
187 * @return the surroundingSpacesNeedQuotes flag
188 */
189 public boolean isSurroundingSpacesNeedQuotes() {
190 return surroundingSpacesNeedQuotes;
191 }
192
193 /**
194 * Returns the ignoreEmptyLines flag.
195 *
196 * @return the ignoreEmptyLines flag
197 */
198 public boolean isIgnoreEmptyLines() {
199 return ignoreEmptyLines;
200 }
201
202 /**
203 * Returns the CSV encoder.
204 *
205 * @return the CSV encoder
206 */
207 public CsvEncoder getEncoder() {
208 return encoder;
209 }
210
211 /**
212 * Returns the quote mode.
213 *
214 * @return the quote mode
215 */
216 public QuoteMode getQuoteMode() {
217 return quoteMode;
218 }
219
220 /**
221 * Returns the comment matcher.
222 *
223 * @return the comment matcher
224 */
225 public CommentMatcher getCommentMatcher() {
226 return commentMatcher;
227 }
228
229 /**
230 * Returns the maximum number of lines a row can span.
231 *
232 * @return the maximum number of lines a row can span
233 */
234 public int getMaxLinesPerRow() {
235 return maxLinesPerRow;
236 }
237
238 /**
239 * Builds immutable <tt>CsvPreference</tt> instances. The builder pattern allows for additional preferences to be
240 * added in the future.
241 */
242 public static class Builder {
243
244 private final char quoteChar;
245
246 private final int delimiterChar;
247
248 private final String endOfLineSymbols;
249
250 private boolean surroundingSpacesNeedQuotes = false;
251
252 private boolean ignoreEmptyLines = true;
253
254 private CsvEncoder encoder;
255
256 private QuoteMode quoteMode;
257
258 private CommentMatcher commentMatcher;
259
260 private int maxLinesPerRow = 0;
261
262 /**
263 * Constructs a Builder with all of the values from an existing <tt>CsvPreference</tt> instance. Useful if you
264 * want to base your preferences off one of the existing CsvPreference constants.
265 *
266 * @param preference
267 * the existing preference
268 */
269 public Builder(final CsvPreference preference) {
270 this.quoteChar = preference.quoteChar;
271 this.delimiterChar = preference.delimiterChar;
272 this.endOfLineSymbols = preference.endOfLineSymbols;
273 this.surroundingSpacesNeedQuotes = preference.surroundingSpacesNeedQuotes;
274 this.ignoreEmptyLines = preference.ignoreEmptyLines;
275 this.encoder = preference.encoder;
276 this.quoteMode = preference.quoteMode;
277 this.commentMatcher = preference.commentMatcher;
278 this.maxLinesPerRow = preference.maxLinesPerRow;
279 }
280
281 /**
282 * Constructs a Builder with the mandatory preference values.
283 *
284 * @param quoteChar
285 * matching pairs of this character are used to escape columns containing the delimiter
286 * @param delimiterChar
287 * the character separating each column
288 * @param endOfLineSymbols
289 * one or more symbols terminating the line, e.g. "\n". Only used for writing.
290 * @throws IllegalArgumentException
291 * if quoteChar and delimiterChar are the same character
292 * @throws NullPointerException
293 * if endOfLineSymbols is null
294 */
295 public Builder(final char quoteChar, final int delimiterChar, final String endOfLineSymbols) {
296 if( quoteChar == delimiterChar ) {
297 throw new IllegalArgumentException(String.format(
298 "quoteChar and delimiterChar should not be the same character: %c", quoteChar));
299 } else if( endOfLineSymbols == null ) {
300 throw new NullPointerException("endOfLineSymbols should not be null");
301 }
302 this.quoteChar = quoteChar;
303 this.delimiterChar = delimiterChar;
304 this.endOfLineSymbols = endOfLineSymbols;
305 }
306
307 /**
308 * Flag indicating whether spaces at the beginning or end of a cell should be ignored if they're not surrounded
309 * by quotes (applicable to both reading and writing CSV). The default is <tt>false</tt>, as spaces
310 * "are considered part of a field and should not be ignored" according to RFC 4180.
311 *
312 * @since 2.0.0
313 * @param surroundingSpacesNeedQuotes
314 * flag indicating whether spaces at the beginning or end of a cell should be ignored if they're not
315 * surrounded by quotes
316 * @return the updated Builder
317 */
318 public Builder surroundingSpacesNeedQuotes(final boolean surroundingSpacesNeedQuotes) {
319 this.surroundingSpacesNeedQuotes = surroundingSpacesNeedQuotes;
320 return this;
321 }
322
323 /**
324 * Flag indicating whether empty lines (i.e. containing only end of line symbols) should be ignored. The default
325 * is <tt>true</tt>.
326 *
327 * @since 2.2.1
328 * @param ignoreEmptyLines
329 * flag indicating whether empty lines should be ignored
330 * @return the updated Builder
331 */
332 public Builder ignoreEmptyLines(final boolean ignoreEmptyLines) {
333 this.ignoreEmptyLines = ignoreEmptyLines;
334 return this;
335 }
336
337 /**
338 * Enables the skipping of comments. You can supply your own comment matcher or use one of the predefined ones:
339 * {@link org.supercsv.comment.CommentStartsWith CommentStartsWith} or
340 * {@link org.supercsv.comment.CommentMatches CommentMatches}
341 *
342 * @since 2.1.0
343 * @param commentMatcher
344 * the comment matcher to use
345 * @return the updated Builder
346 * @throws NullPointerException
347 * if commentMatcher is null
348 */
349 public Builder skipComments(final CommentMatcher commentMatcher) {
350 if( commentMatcher == null ) {
351 throw new NullPointerException("commentMatcher should not be null");
352 }
353 this.commentMatcher = commentMatcher;
354 return this;
355 }
356
357 /**
358 * Uses a custom CsvEncoder to escape CSV for writing.
359 *
360 * @since 2.1.0
361 * @param encoder
362 * the custom encoder
363 * @return the updated Builder
364 * @throws NullPointerException
365 * if encoder is null
366 */
367 public Builder useEncoder(final CsvEncoder encoder) {
368 if( encoder == null ) {
369 throw new NullPointerException("encoder should not be null");
370 }
371 this.encoder = encoder;
372 return this;
373 }
374
375 /**
376 * Uses a custom QuoteMode to determine if surrounding quotes should be applied when writing (only applicable if
377 * a column doesn't contain any special characters and wouldn't otherwise be quoted). You can supply your own
378 * quote mode or use one of the predefined ones: {@link org.supercsv.quote.AlwaysQuoteMode AlwaysQuoteMode} or
379 * {@link org.supercsv.quote.ColumnQuoteMode ColumnQuoteMode}
380 *
381 * @since 2.1.0
382 * @param quoteMode
383 * the quote mode
384 * @return the updated Builder
385 * @throws NullPointerException
386 * if quoteMode is null
387 */
388 public Builder useQuoteMode(final QuoteMode quoteMode) {
389 if( quoteMode == null ) {
390 throw new NullPointerException("quoteMode should not be null");
391 }
392 this.quoteMode = quoteMode;
393 return this;
394 }
395
396 /**
397 * The maximum number of lines that a row can span before an exception is thrown (only applicable when reading
398 * CSV). This option allows CSV readers to fail fast when encountering CSV with mismatching quotes - the normal
399 * behaviour would be to continue reading until the matching quote is found, which could potentially mean
400 * reading the whole file (and exhausting all available memory). Zero or a negative value will disable this
401 * option. The default is <tt>0</tt>.
402 *
403 * @since 2.4.0
404 * @param maxLinesPerRow
405 * the maximum number of lines a row can span before an exception is thrown
406 * @return the updated Builder
407 */
408 public Builder maxLinesPerRow(final int maxLinesPerRow) {
409 this.maxLinesPerRow = maxLinesPerRow;
410 return this;
411 }
412
413 /**
414 * Builds the CsvPreference instance.
415 *
416 * @return the immutable CsvPreference instance
417 */
418 public CsvPreference build() {
419
420 if( encoder == null ) {
421 encoder = new DefaultCsvEncoder();
422 }
423
424 if( quoteMode == null ) {
425 quoteMode = new NormalQuoteMode();
426 }
427
428 return new CsvPreference(this);
429 }
430
431 }
432
433 }