View Javadoc
1   /*
2    * Copyright 2007 Kasper B. Graversen
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.supercsv.prefs;
17  
18  import org.supercsv.comment.CommentMatcher;
19  import org.supercsv.encoder.CsvEncoder;
20  import org.supercsv.encoder.DefaultCsvEncoder;
21  import org.supercsv.quote.NormalQuoteMode;
22  import org.supercsv.quote.QuoteMode;
23  
24  /**
25   * Before reading or writing CSV files, you must supply the reader/writer with some preferences.
26   * <p>
27   * <strong>Please note:</strong> the end of line symbols are <em>only</em> used for writing.
28   * </p>
29   * <table border="0" cellpadding="1" >
30   * <caption>Predefined configurations</caption> <tbody>
31   * <tr>
32   * <th align="left">Constant</th>
33   * <th align="left">Quote character</th>
34   * <th align="left">Delimiter character</th>
35   * <th align="left">End of line symbols</th>
36   * </tr>
37   * <tr>
38   * <td><code>STANDARD_PREFERENCE</code></td>
39   * <td><code>"</code></td>
40   * <td><code>,</code></td>
41   * <td><code>\r\n</code></td>
42   * </tr>
43   * <tr>
44   * <td><code>EXCEL_PREFERENCE</code></td>
45   * <td><code>"</code></td>
46   * <td><code>,</code></td>
47   * <td><code>\n</code></td>
48   * </tr>
49   * <tr>
50   * <td><code>EXCEL_NORTH_EUROPE_PREFERENCE</code></td>
51   * <td><code>"</code></td>
52   * <td><code>;</code></td>
53   * <td><code>\n</code></td>
54   * </tr>
55   * <tr>
56   * <td><code>TAB_PREFERENCE</code></td>
57   * <td><code>"</code></td>
58   * <td><code>\t</code></td>
59   * <td><code>\n</code></td>
60   * </tr>
61   * </tbody>
62   * </table>
63   * <p>
64   * By default, spaces surrounding an unquoted cell are treated as part of the data. In versions of Super CSV prior to
65   * 2.0.0 this wasn't the case, and any surrounding spaces that weren't within quotes were ignored when reading (and
66   * quotes were automatically added to Strings containing surrounding spaces when writing).
67   * </p>
68   * <p>
69   * If you wish enable this functionality again, then you can create a CsvPreference with the
70   * <tt>surroundingSpacesNeedQuotes</tt> flag set to true (the default is false). This means that surrounding spaces
71   * without quotes will be trimmed when reading, and quotes will automatically be added for Strings containing
72   * surrounding spaces when writing.
73   * </p>
74   * <p>
75   * You can apply the surroundingSpacesNeedQuotes property to an existing preference as follows:
76   * </p>
77   * <p>
78   * {@code private static final CsvPreference STANDARD_SURROUNDING_SPACES_NEED_QUOTES = new CsvPreference.Builder(CsvPreference.STANDARD_PREFERENCE).surroundingSpacesNeedQuotes(true).build();}
79   * </p>
80   * <p>
81   * You can also create your own preferences. For example if your file was pipe-delimited, you could use the following:
82   * </p>
83   * <p>
84   * {@code private static final CsvPreference PIPE_DELIMITED = new CsvPreference.Builder('"', '|', "\n").build();}
85   * </p>
86   * Other preferences incude:
87   * <ul>
88   * <li>using a custom {@link CsvEncoder} when writing CSV (if you want complete control over how the CSV is encoded)</li>
89   * <li>using a custom {@link QuoteMode} when writing CSV (if you want to enable quotes when they're not normally
90   * required). You can use the existing {@link org.supercsv.quote.AlwaysQuoteMode AlwaysQuoteMode},
91   * {@link org.supercsv.quote.ColumnQuoteMode ColumnQuoteMode}, or supply your own.</li>
92   * <li>using a custom {@link CommentMatcher} to skip comments when reading CSV. You can use the existing
93   * {@link org.supercsv.comment.CommentStartsWith CommentStartsWith}, {@link org.supercsv.comment.CommentMatches
94   * CommentMatches}, or supply your own.</li>
95   * <li>ignoring empty lines (enabled by default)</li>
96   * <li>setting the maximum number of lines a row of CSV can span (useful for debugging files with mismatched quotes)</li>
97   * </ul>
98   * 
99   * @author Kasper B. Graversen
100  * @author James Bassett
101  */
102 public final class CsvPreference {
103 	
104 	/**
105 	 * Ready to use configuration that should cover 99% of all usages.
106 	 */
107 	public static final CsvPreference STANDARD_PREFERENCE = new CsvPreference.Builder('"', ',', "\r\n").build();
108 	
109 	/**
110 	 * Ready to use configuration for Windows Excel exported CSV files.
111 	 */
112 	public static final CsvPreference EXCEL_PREFERENCE = new CsvPreference.Builder('"', ',', "\n").build();
113 	
114 	/**
115 	 * Ready to use configuration for north European excel CSV files (columns are separated by ";" instead of ",")
116 	 */
117 	public static final CsvPreference EXCEL_NORTH_EUROPE_PREFERENCE = new CsvPreference.Builder('"', ';', "\n").build();
118 	
119 	/**
120 	 * Ready to use configuration for tab-delimited files.
121 	 */
122 	public static final CsvPreference TAB_PREFERENCE = new CsvPreference.Builder('"', '\t', "\n").build();
123 	
124 	private final char quoteChar;
125 	
126 	private final int delimiterChar;
127 	
128 	private final String endOfLineSymbols;
129 	
130 	private final boolean surroundingSpacesNeedQuotes;
131 	
132 	private final boolean ignoreEmptyLines;
133 	
134 	private final CsvEncoder encoder;
135 	
136 	private final QuoteMode quoteMode;
137 	
138 	private final CommentMatcher commentMatcher;
139 	
140 	private int maxLinesPerRow = 0;
141 	
142 	/**
143 	 * Constructs a new <tt>CsvPreference</tt> from a Builder.
144 	 */
145 	private CsvPreference(Builder builder) {
146 		this.quoteChar = builder.quoteChar;
147 		this.delimiterChar = builder.delimiterChar;
148 		this.endOfLineSymbols = builder.endOfLineSymbols;
149 		this.surroundingSpacesNeedQuotes = builder.surroundingSpacesNeedQuotes;
150 		this.ignoreEmptyLines = builder.ignoreEmptyLines;
151 		this.commentMatcher = builder.commentMatcher;
152 		this.encoder = builder.encoder;
153 		this.quoteMode = builder.quoteMode;
154 		this.maxLinesPerRow = builder.maxLinesPerRow;
155 	}
156 	
157 	/**
158 	 * Returns the delimiter character
159 	 * 
160 	 * @return the delimiter character
161 	 */
162 	public int getDelimiterChar() {
163 		return delimiterChar;
164 	}
165 	
166 	/**
167 	 * Returns the end of line symbols
168 	 * 
169 	 * @return the end of line symbols
170 	 */
171 	public String getEndOfLineSymbols() {
172 		return endOfLineSymbols;
173 	}
174 	
175 	/**
176 	 * Returns the quote character
177 	 * 
178 	 * @return the quote character
179 	 */
180 	public char getQuoteChar() {
181 		return quoteChar;
182 	}
183 	
184 	/**
185 	 * Returns the surroundingSpacesNeedQuotes flag.
186 	 * 
187 	 * @return the surroundingSpacesNeedQuotes flag
188 	 */
189 	public boolean isSurroundingSpacesNeedQuotes() {
190 		return surroundingSpacesNeedQuotes;
191 	}
192 	
193 	/**
194 	 * Returns the ignoreEmptyLines flag.
195 	 * 
196 	 * @return the ignoreEmptyLines flag
197 	 */
198 	public boolean isIgnoreEmptyLines() {
199 		return ignoreEmptyLines;
200 	}
201 	
202 	/**
203 	 * Returns the CSV encoder.
204 	 * 
205 	 * @return the CSV encoder
206 	 */
207 	public CsvEncoder getEncoder() {
208 		return encoder;
209 	}
210 	
211 	/**
212 	 * Returns the quote mode.
213 	 * 
214 	 * @return the quote mode
215 	 */
216 	public QuoteMode getQuoteMode() {
217 		return quoteMode;
218 	}
219 	
220 	/**
221 	 * Returns the comment matcher.
222 	 * 
223 	 * @return the comment matcher
224 	 */
225 	public CommentMatcher getCommentMatcher() {
226 		return commentMatcher;
227 	}
228 	
229 	/**
230 	 * Returns the maximum number of lines a row can span.
231 	 *
232 	 * @return the maximum number of lines a row can span
233 	 */
234 	public int getMaxLinesPerRow() {
235 		return maxLinesPerRow;
236 	}
237 	
238 	/**
239 	 * Builds immutable <tt>CsvPreference</tt> instances. The builder pattern allows for additional preferences to be
240 	 * added in the future.
241 	 */
242 	public static class Builder {
243 		
244 		private final char quoteChar;
245 		
246 		private final int delimiterChar;
247 		
248 		private final String endOfLineSymbols;
249 		
250 		private boolean surroundingSpacesNeedQuotes = false;
251 		
252 		private boolean ignoreEmptyLines = true;
253 		
254 		private CsvEncoder encoder;
255 		
256 		private QuoteMode quoteMode;
257 		
258 		private CommentMatcher commentMatcher;
259 		
260 		private int maxLinesPerRow = 0;
261 		
262 		/**
263 		 * Constructs a Builder with all of the values from an existing <tt>CsvPreference</tt> instance. Useful if you
264 		 * want to base your preferences off one of the existing CsvPreference constants.
265 		 * 
266 		 * @param preference
267 		 *            the existing preference
268 		 */
269 		public Builder(final CsvPreference preference) {
270 			this.quoteChar = preference.quoteChar;
271 			this.delimiterChar = preference.delimiterChar;
272 			this.endOfLineSymbols = preference.endOfLineSymbols;
273 			this.surroundingSpacesNeedQuotes = preference.surroundingSpacesNeedQuotes;
274 			this.ignoreEmptyLines = preference.ignoreEmptyLines;
275 			this.encoder = preference.encoder;
276 			this.quoteMode = preference.quoteMode;
277 			this.commentMatcher = preference.commentMatcher;
278 			this.maxLinesPerRow = preference.maxLinesPerRow;
279 		}
280 		
281 		/**
282 		 * Constructs a Builder with the mandatory preference values.
283 		 * 
284 		 * @param quoteChar
285 		 *            matching pairs of this character are used to escape columns containing the delimiter
286 		 * @param delimiterChar
287 		 *            the character separating each column
288 		 * @param endOfLineSymbols
289 		 *            one or more symbols terminating the line, e.g. "\n". Only used for writing.
290 		 * @throws IllegalArgumentException
291 		 *             if quoteChar and delimiterChar are the same character
292 		 * @throws NullPointerException
293 		 *             if endOfLineSymbols is null
294 		 */
295 		public Builder(final char quoteChar, final int delimiterChar, final String endOfLineSymbols) {
296 			if( quoteChar == delimiterChar ) {
297 				throw new IllegalArgumentException(String.format(
298 					"quoteChar and delimiterChar should not be the same character: %c", quoteChar));
299 			} else if( endOfLineSymbols == null ) {
300 				throw new NullPointerException("endOfLineSymbols should not be null");
301 			}
302 			this.quoteChar = quoteChar;
303 			this.delimiterChar = delimiterChar;
304 			this.endOfLineSymbols = endOfLineSymbols;
305 		}
306 		
307 		/**
308 		 * Flag indicating whether spaces at the beginning or end of a cell should be ignored if they're not surrounded
309 		 * by quotes (applicable to both reading and writing CSV). The default is <tt>false</tt>, as spaces
310 		 * "are considered part of a field and should not be ignored" according to RFC 4180.
311 		 * 
312 		 * @since 2.0.0
313 		 * @param surroundingSpacesNeedQuotes
314 		 *            flag indicating whether spaces at the beginning or end of a cell should be ignored if they're not
315 		 *            surrounded by quotes
316 		 * @return the updated Builder
317 		 */
318 		public Builder surroundingSpacesNeedQuotes(final boolean surroundingSpacesNeedQuotes) {
319 			this.surroundingSpacesNeedQuotes = surroundingSpacesNeedQuotes;
320 			return this;
321 		}
322 		
323 		/**
324 		 * Flag indicating whether empty lines (i.e. containing only end of line symbols) should be ignored. The default
325 		 * is <tt>true</tt>.
326 		 * 
327 		 * @since 2.2.1
328 		 * @param ignoreEmptyLines
329 		 *            flag indicating whether empty lines should be ignored
330 		 * @return the updated Builder
331 		 */
332 		public Builder ignoreEmptyLines(final boolean ignoreEmptyLines) {
333 			this.ignoreEmptyLines = ignoreEmptyLines;
334 			return this;
335 		}
336 		
337 		/**
338 		 * Enables the skipping of comments. You can supply your own comment matcher or use one of the predefined ones:
339 		 * {@link org.supercsv.comment.CommentStartsWith CommentStartsWith} or
340 		 * {@link org.supercsv.comment.CommentMatches CommentMatches}
341 		 * 
342 		 * @since 2.1.0
343 		 * @param commentMatcher
344 		 *            the comment matcher to use
345 		 * @return the updated Builder
346 		 * @throws NullPointerException
347 		 *             if commentMatcher is null
348 		 */
349 		public Builder skipComments(final CommentMatcher commentMatcher) {
350 			if( commentMatcher == null ) {
351 				throw new NullPointerException("commentMatcher should not be null");
352 			}
353 			this.commentMatcher = commentMatcher;
354 			return this;
355 		}
356 		
357 		/**
358 		 * Uses a custom CsvEncoder to escape CSV for writing.
359 		 * 
360 		 * @since 2.1.0
361 		 * @param encoder
362 		 *            the custom encoder
363 		 * @return the updated Builder
364 		 * @throws NullPointerException
365 		 *             if encoder is null
366 		 */
367 		public Builder useEncoder(final CsvEncoder encoder) {
368 			if( encoder == null ) {
369 				throw new NullPointerException("encoder should not be null");
370 			}
371 			this.encoder = encoder;
372 			return this;
373 		}
374 		
375 		/**
376 		 * Uses a custom QuoteMode to determine if surrounding quotes should be applied when writing (only applicable if
377 		 * a column doesn't contain any special characters and wouldn't otherwise be quoted). You can supply your own
378 		 * quote mode or use one of the predefined ones: {@link org.supercsv.quote.AlwaysQuoteMode AlwaysQuoteMode} or
379 		 * {@link org.supercsv.quote.ColumnQuoteMode ColumnQuoteMode}
380 		 * 
381 		 * @since 2.1.0
382 		 * @param quoteMode
383 		 *            the quote mode
384 		 * @return the updated Builder
385 		 * @throws NullPointerException
386 		 *             if quoteMode is null
387 		 */
388 		public Builder useQuoteMode(final QuoteMode quoteMode) {
389 			if( quoteMode == null ) {
390 				throw new NullPointerException("quoteMode should not be null");
391 			}
392 			this.quoteMode = quoteMode;
393 			return this;
394 		}
395 		
396 		/**
397 		 * The maximum number of lines that a row can span before an exception is thrown (only applicable when reading
398 		 * CSV). This option allows CSV readers to fail fast when encountering CSV with mismatching quotes - the normal
399 		 * behaviour would be to continue reading until the matching quote is found, which could potentially mean
400 		 * reading the whole file (and exhausting all available memory). Zero or a negative value will disable this
401 		 * option. The default is <tt>0</tt>.
402 		 * 
403 		 * @since 2.4.0
404 		 * @param maxLinesPerRow
405 		 *            the maximum number of lines a row can span before an exception is thrown
406 		 * @return the updated Builder
407 		 */
408 		public Builder maxLinesPerRow(final int maxLinesPerRow) {
409 			this.maxLinesPerRow = maxLinesPerRow;
410 			return this;
411 		}
412 		
413 		/**
414 		 * Builds the CsvPreference instance.
415 		 * 
416 		 * @return the immutable CsvPreference instance
417 		 */
418 		public CsvPreference build() {
419 			
420 			if( encoder == null ) {
421 				encoder = new DefaultCsvEncoder();
422 			}
423 			
424 			if( quoteMode == null ) {
425 				quoteMode = new NormalQuoteMode();
426 			}
427 			
428 			return new CsvPreference(this);
429 		}
430 		
431 	}
432 	
433 }