View Javadoc
1   /*
2    * Copyright 2007 Kasper B. Graversen
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.supercsv.io;
17  
18  import static org.junit.Assert.assertEquals;
19  import static org.junit.Assert.assertFalse;
20  import static org.junit.Assert.assertNull;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  import static org.supercsv.prefs.CsvPreference.EXCEL_PREFERENCE;
24  
25  import java.io.IOException;
26  import java.io.Reader;
27  import java.io.StringReader;
28  import java.util.ArrayList;
29  import java.util.List;
30  
31  import org.junit.After;
32  import org.junit.Before;
33  import org.junit.Test;
34  import org.supercsv.comment.CommentMatches;
35  import org.supercsv.comment.CommentStartsWith;
36  import org.supercsv.exception.SuperCsvException;
37  import org.supercsv.prefs.CsvPreference;
38  
39  public class TokenizerTest {
40  	
41  	private static final CsvPreference NORMAL_PREFERENCE = EXCEL_PREFERENCE;
42  	private static final CsvPreference SPACES_NEED_QUOTES_PREFERENCE = new CsvPreference.Builder(EXCEL_PREFERENCE)
43  		.surroundingSpacesNeedQuotes(true).build();
44  	private static final CsvPreference DONT_IGNORE_EMPTY_LINES_PREFERENCE = new CsvPreference.Builder(EXCEL_PREFERENCE)
45  		.ignoreEmptyLines(false).build();
46  	
47  	private Tokenizer tokenizer;
48  	private List<String> columns;
49  	
50  	/**
51  	 * Sets up the columns List for the test.
52  	 * 
53  	 * @throws Exception
54  	 */
55  	@Before
56  	public void setUp() {
57  		columns = new ArrayList<String>();
58  	}
59  	
60  	/**
61  	 * Tidies up after the test.
62  	 */
63  	@After
64  	public void tearDown() throws IOException {
65  		if( tokenizer != null ) {
66  			tokenizer.close();
67  		}
68  	}
69  	
70  	/**
71  	 * Creates a Tokenizer with the input and preferences.
72  	 * 
73  	 * @param input
74  	 *            the input String
75  	 * @param preference
76  	 *            the preferences
77  	 * @return the Tokenizer
78  	 */
79  	private static Tokenizer createTokenizer(String input, CsvPreference preference) {
80  		final Reader r = input != null ? new StringReader(input) : null;
81  		return new Tokenizer(r, preference);
82  	}
83  	
84  	/**
85  	 * Tests the constructor with a null Reader (should throw an Exception).
86  	 */
87  	@Test(expected = NullPointerException.class)
88  	public void testConstructorWithNullReader() throws Exception {
89  		createTokenizer(null, NORMAL_PREFERENCE);
90  	}
91  	
92  	/**
93  	 * Tests the constructor with a null CsvPreference (should throw an Exception).
94  	 */
95  	@Test(expected = NullPointerException.class)
96  	public void testConstructorWithNullPreferences() throws Exception {
97  		createTokenizer("", null);
98  	}
99  	
100 	/**
101 	 * Tests the readColumns() method with null List (should throw an Exception).
102 	 */
103 	@Test(expected = NullPointerException.class)
104 	public void testReadColumnsWithNullList() throws Exception {
105 		tokenizer = createTokenizer("", NORMAL_PREFERENCE);
106 		tokenizer.readColumns(null);
107 	}
108 	
109 	/**
110 	 * Tests the getPreferences() method.
111 	 */
112 	@Test()
113 	public void testGetPreferences() throws Exception {
114 		tokenizer = createTokenizer("", NORMAL_PREFERENCE);
115 		CsvPreference prefs = tokenizer.getPreferences();
116 		assertEquals(NORMAL_PREFERENCE.getDelimiterChar(), prefs.getDelimiterChar());
117 		assertEquals(NORMAL_PREFERENCE.getEndOfLineSymbols(), prefs.getEndOfLineSymbols());
118 		assertEquals(NORMAL_PREFERENCE.getQuoteChar(), prefs.getQuoteChar());
119 		assertEquals(NORMAL_PREFERENCE.isSurroundingSpacesNeedQuotes(), prefs.isSurroundingSpacesNeedQuotes());
120 	}
121 	
122 	/**
123 	 * Tests the readColumns() method with no data.
124 	 */
125 	@Test
126 	public void testReadColumnsWithNoData() throws Exception {
127 		final String input = "";
128 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
129 		tokenizer.readColumns(columns);
130 		assertTrue(columns.isEmpty());
131 		assertEquals(input, tokenizer.getUntokenizedRow());
132 	}
133 	
134 	/**
135 	 * Tests that the readColumns() method skips over empty lines.
136 	 */
137 	@Test
138 	public void testEmptyLines() throws Exception {
139 		
140 		final String input = "\n\nthis is the third line\n";
141 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
142 		tokenizer.readColumns(columns);
143 		assertTrue(columns.size() == 1);
144 		assertEquals("this is the third line", columns.get(0));
145 		assertEquals(3, tokenizer.getLineNumber());
146 		assertEquals("this is the third line", tokenizer.getUntokenizedRow());
147 	}
148 	
149 	/**
150 	 * Tests that the readColumns() method doesn't skip over empty lines if the ignoreEmptyLines
151 	 * preference is disabled.
152 	 */
153 	@Test
154 	public void testEmptyLinesWithIgnoreEmptyLines() throws Exception {
155 		
156 		final String input = "\nthis is the second line\n\n";
157 		tokenizer = createTokenizer(input, DONT_IGNORE_EMPTY_LINES_PREFERENCE);
158 		tokenizer.readColumns(columns);
159 		assertTrue(columns.size() == 1);
160 		assertNull(columns.get(0));
161 		assertEquals(1, tokenizer.getLineNumber());
162 		assertEquals("", tokenizer.getUntokenizedRow());
163 		
164 		tokenizer.readColumns(columns);
165 		assertTrue(columns.size() == 1);
166 		assertEquals("this is the second line", columns.get(0));
167 		assertEquals(2, tokenizer.getLineNumber());
168 		assertEquals("this is the second line", tokenizer.getUntokenizedRow());
169 		
170 		tokenizer.readColumns(columns);
171 		assertTrue(columns.size() == 1);
172 		assertNull(columns.get(0));
173 		assertEquals(3, tokenizer.getLineNumber());
174 		assertEquals("", tokenizer.getUntokenizedRow());
175 	}
176 	
177 	/**
178 	 * Tests the readColumns() method a quoted section has text surrounding it. This is not technically valid CSV, but
179 	 * the tokenizer is lenient enough to allow it (it will just unescape the quoted section).
180 	 */
181 	@Test
182 	public void testQuotedFieldWithSurroundingText() throws Exception {
183 		
184 		final String input = "surrounding \"quoted\" text";
185 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
186 		tokenizer.readColumns(columns);
187 		assertTrue(columns.size() == 1);
188 		assertEquals("surrounding quoted text", columns.get(0));
189 		assertEquals(1, tokenizer.getLineNumber());
190 		assertEquals(input, tokenizer.getUntokenizedRow());
191 		
192 		// same result when surrounding spaces require quotes
193 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
194 		tokenizer.readColumns(columns);
195 		assertTrue(columns.size() == 1);
196 		assertEquals("surrounding quoted text", columns.get(0));
197 		assertEquals(1, tokenizer.getLineNumber());
198 		assertEquals(input, tokenizer.getUntokenizedRow());
199 	}
200 	
201 	/**
202 	 * Tests the readColumns() method when a quoted section with text after it. This is not technically valid CSV, but
203 	 * the tokenizer is lenient enough to allow it (it will just unescape the quoted section).
204 	 */
205 	@Test
206 	public void testQuotedFieldWithTextAfter() throws Exception {
207 		
208 		// illegal char after quoted section
209 		final String input = "\"quoted on 2 lines\nand afterward some\" text";
210 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
211 		tokenizer.readColumns(columns);
212 		assertEquals(1, columns.size());
213 		assertEquals("quoted on 2 lines\nand afterward some text", columns.get(0));
214 		assertEquals(2, tokenizer.getLineNumber());
215 		assertEquals(input, tokenizer.getUntokenizedRow());
216 		
217 		// should have exactly the same result when surrounding spaces need quotes
218 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
219 		tokenizer.readColumns(columns);
220 		assertEquals(1, columns.size());
221 		assertEquals("quoted on 2 lines\nand afterward some text", columns.get(0));
222 		assertEquals(2, tokenizer.getLineNumber());
223 		assertEquals(input, tokenizer.getUntokenizedRow());
224 	}
225 	
226 	/**
227 	 * Tests the readColumns() method with a single quoted newline.
228 	 */
229 	@Test
230 	public void testQuotedNewline() throws Exception {
231 		
232 		final String input = "\"\n\"";
233 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
234 		tokenizer.readColumns(columns);
235 		assertTrue(columns.size() == 1);
236 		assertEquals("\n", columns.get(0));
237 		assertEquals(input, tokenizer.getUntokenizedRow());
238 		
239 		// same input when surrounding spaces require quotes (results should be identical)
240 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
241 		tokenizer.readColumns(columns);
242 		assertTrue(columns.size() == 1);
243 		assertEquals("\n", columns.get(0));
244 		assertEquals(input, tokenizer.getUntokenizedRow());
245 	}
246 	
247 	/**
248 	 * Tests the readColumns() method with a variety of quoted newlines.
249 	 */
250 	@Test
251 	public void testQuotedNewlines() throws Exception {
252 		
253 		final String input = "\"one line\",\"two\nlines\",\"three\nlines\n!\"";
254 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
255 		tokenizer.readColumns(columns);
256 		assertTrue(columns.size() == 3);
257 		assertEquals("one line", columns.get(0));
258 		assertEquals("two\nlines", columns.get(1));
259 		assertEquals("three\nlines\n!", columns.get(2));
260 		assertEquals(input, tokenizer.getUntokenizedRow());
261 		
262 		// same input when surrounding spaces require quotes (results should be identical)
263 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
264 		tokenizer.readColumns(columns);
265 		assertTrue(columns.size() == 3);
266 		assertEquals("one line", columns.get(0));
267 		assertEquals("two\nlines", columns.get(1));
268 		assertEquals("three\nlines\n!", columns.get(2));
269 		assertEquals(input, tokenizer.getUntokenizedRow());
270 	}
271 	
272 	
273 	/**
274 	 * Tests the readColumns() method when a quoted field has consecutive newlines.
275 	 */
276 	@Test
277 	public void testQuotedTextWithConsecutiveNewLines() throws Exception {
278 		
279 		// second field has consecutive newlines
280 		final String input = "one, \"multiline\n\n\ntext\"";
281 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
282 		tokenizer.readColumns(columns);
283 		assertEquals(2, columns.size());
284 		assertEquals("one", columns.get(0));
285 		assertEquals(" multiline\n\n\ntext", columns.get(1));
286 		assertEquals(4, tokenizer.getLineNumber());
287 		assertEquals(input, tokenizer.getUntokenizedRow());
288 		
289 		// should have exactly the same result when surrounding spaces need quotes
290 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
291 		tokenizer.readColumns(columns);
292 		assertTrue(columns.size() == 2);
293 		assertEquals("one", columns.get(0));
294 		assertEquals("multiline\n\n\ntext", columns.get(1));
295 		assertEquals(input, tokenizer.getUntokenizedRow());
296 	}
297 	
298 	/**
299 	 * Tests the readColumns() method when EOF is reached within quote scope.
300 	 */
301 	@Test
302 	public void testQuotedFieldWithUnexpectedEOF() throws Exception {
303 		
304 		// EOF reached within quote scope
305 		final String input = "\"quoted spanning\ntwo lines with EOF reached before another quote";
306 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
307 		try {
308 			tokenizer.readColumns(columns);
309 			fail("should have thrown SuperCsvException");
310 		}
311 		catch(SuperCsvException e) {
312 			assertEquals("unexpected end of file while reading quoted column beginning on line 1 and ending on line 2",
313 				e.getMessage());
314 		}
315 	}
316 
317 	/**
318 	 * Tests the readColumns() method when a newline is reached in quote
319 	 * scoped when a single line is only supposed to be read
320 	 */
321 	@Test
322 	public void testQuotedFieldWithUnexpectedNewline() throws Exception {
323 
324 		// Row 2 has a missing trailing quote
325 		final String input = "col1,col2\n" +
326 				"\"foo\",\"bar\n" +
327 				"\"baz\",\"zoo\"\n" +
328 				"\"aaa\",\"bbb\"";
329 		CsvPreference pref = new CsvPreference.Builder(NORMAL_PREFERENCE)
330 				.maxLinesPerRow(1).build();
331 
332 		tokenizer = createTokenizer(input, pref);
333 		try {
334 			boolean first = tokenizer.readColumns(columns);
335 			assertEquals(true , first);
336 
337 			tokenizer.readColumns(columns);
338 			fail("should have thrown SuperCsvException");
339 		}
340 		catch(SuperCsvException e) {
341 			assertEquals("unexpected end of line while reading quoted column on line 2",
342 					e.getMessage());
343 		}
344 	}
345 
346 	/**
347 	 * Tests the readColumns() method when a newline is reached in quote
348 	 * scoped when two lines are only supposed to be read
349 	 */
350 	@Test
351 	public void testQuotedFieldWithTwoMaxLines() throws Exception {
352 
353 		// Row 2 has a missing trailing quote
354 		final String input = "col1,col2\n" +
355 				"\"foo\",\"bar\n" +
356 				"baz,zoo\n" +
357 				"aaa,bbb";
358 		CsvPreference pref = new CsvPreference.Builder(NORMAL_PREFERENCE)
359 				.maxLinesPerRow(2).build();
360 
361 		tokenizer = createTokenizer(input, pref);
362 		try {
363 			boolean first = tokenizer.readColumns(columns);
364 			assertEquals(true , first);
365 
366 			boolean second = tokenizer.readColumns(columns);
367 			assertEquals(true , second);
368 
369 			tokenizer.readColumns(columns);
370 			fail("should have thrown SuperCsvException");
371 		}
372 		catch(SuperCsvException e) {
373 			assertEquals("max number of lines to read exceeded while reading quoted column beginning on line 2 and ending on line 3",
374 					e.getMessage());
375 		}
376 	}
377 
378 	@Test
379 	public void testQuotedFieldWithUnexpectedNewlineNoNextLineRead() throws Exception {
380 
381 		// Row 2 has a missing trailing quote
382 		final String input = "col1,col2\n" +
383 				"\"foo\",\"bar\n" +
384 				"\"baz\",\"zoo\"\n" +
385 				"\"aaa\",\"bbb\"";
386 		CsvPreference pref = new CsvPreference.Builder(NORMAL_PREFERENCE)
387 				.maxLinesPerRow(1).build();
388 
389 		tokenizer = createTokenizer(input, pref);
390 		try {
391 			final boolean first = tokenizer.readColumns(columns);
392 			assertEquals(true , first);
393 			assertEquals("[col1, col2]" , columns.toString());
394 
395 			tokenizer.readColumns(columns);
396 			fail("should have thrown SuperCsvException");
397 		}
398 		catch(SuperCsvException e) {
399 			assertEquals("unexpected end of line while reading quoted column on line 2",
400 					e.getMessage());
401 		}
402 		final boolean third = tokenizer.readColumns(columns);
403 		assertEquals(true , third);
404 		assertEquals("[baz, zoo]" , columns.toString());
405 
406 		final boolean fourth = tokenizer.readColumns(columns);
407 		assertEquals(true , fourth);
408 		assertEquals("[aaa, bbb]" , columns.toString());
409 
410 		//line 4 was the last 
411 		final boolean fifth = tokenizer.readColumns(columns);
412 		assertEquals(false , fifth);
413 	}
414 
415 	/**
416 	 * Tests the readColumns() method when a newline is reached in quote
417 	 * scoped when two lines are only supposed to be read
418 	 */
419 	@Test
420 	public void testQuotedFieldWithTwoMaxLinesNoMoreLinesRead() throws Exception {
421 
422 		// Row 2 has a missing trailing quote
423 		final String input = "col1,col2\n" +
424 				"\"foo,bar\n" +
425 				"baz,zoo\n" +
426 				"aaa,bbb";
427 		CsvPreference pref = new CsvPreference.Builder(NORMAL_PREFERENCE)
428 				.maxLinesPerRow(2).build();
429 
430 		tokenizer = createTokenizer(input, pref);
431 		try {
432 			boolean first = tokenizer.readColumns(columns);
433 			assertEquals(true , first);
434 			assertEquals("[col1, col2]" , columns.toString());
435 			
436 
437 			boolean second = tokenizer.readColumns(columns);
438 			assertEquals(true , second);
439 			assertEquals("[\"foo,bar]" , columns.toString());
440 
441 
442 			tokenizer.readColumns(columns);
443 			fail("should have thrown SuperCsvException");
444 		}
445 		catch(SuperCsvException e) {
446 			assertEquals("max number of lines to read exceeded while reading quoted column beginning on line 2 and ending on line 3",
447 					e.getMessage());
448 		}
449 		boolean fourth = tokenizer.readColumns(columns);
450 		assertEquals(true , fourth);
451 		assertEquals("[aaa, bbb]" , columns.toString());
452 		
453 	}
454 
455 	/**
456 	 * Tests the readColumns() method with a leading space before the first quoted field. This is not technically valid
457 	 * CSV, but the tokenizer is lenient enough to allow it. The leading spaces will be trimmed off when surrounding
458 	 * spaces require quotes, otherwise they will be part of the field.
459 	 */
460 	@Test
461 	public void testQuotedFirstFieldWithLeadingSpace() throws Exception {
462 		
463 		// leading spaces should be preserved
464 		final String input = "  \"quoted with leading spaces\",two";
465 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
466 		tokenizer.readColumns(columns);
467 		assertTrue(columns.size() == 2);
468 		assertEquals("  quoted with leading spaces", columns.get(0));
469 		assertEquals("two", columns.get(1));
470 		assertEquals(input, tokenizer.getUntokenizedRow());
471 		
472 		// same input when surrounding spaces require quotes (leading spaces trimmed)
473 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
474 		tokenizer.readColumns(columns);
475 		assertTrue(columns.size() == 2);
476 		assertEquals("quoted with leading spaces", columns.get(0));
477 		assertEquals("two", columns.get(1));
478 		assertEquals(input, tokenizer.getUntokenizedRow());
479 	}
480 	
481 	/**
482 	 * Tests the readColumns() method with a leading space before the last quoted field. This is not technically valid
483 	 * CSV, but the tokenizer is lenient enough to allow it. The leading spaces will be trimmed off when surrounding
484 	 * spaces require quotes, otherwise they will be part of the field.
485 	 */
486 	@Test
487 	public void testQuotedLastFieldWithLeadingSpace() throws Exception {
488 		
489 		// last field has a leading space before quote (should be preserved)
490 		final String input = "one,two,  \"quoted with leading spaces\"";
491 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
492 		tokenizer.readColumns(columns);
493 		assertTrue(columns.size() == 3);
494 		assertEquals("one", columns.get(0));
495 		assertEquals("two", columns.get(1));
496 		assertEquals("  quoted with leading spaces", columns.get(2));
497 		assertEquals(input, tokenizer.getUntokenizedRow());
498 		
499 		// leading space should be trimmed off
500 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
501 		tokenizer.readColumns(columns);
502 		assertTrue(columns.size() == 3);
503 		assertEquals("one", columns.get(0));
504 		assertEquals("two", columns.get(1));
505 		assertEquals("quoted with leading spaces", columns.get(2));
506 		assertEquals(input, tokenizer.getUntokenizedRow());
507 	}
508 	
509 	/**
510 	 * Tests the readColumns() method with a trailing space after the first quoted field. This is not technically valid
511 	 * CSV, but the tokenizer is lenient enough to allow it. The trailing spaces will be trimmed off when surrounding
512 	 * spaces require quotes, otherwise they will be part of the field.
513 	 */
514 	@Test
515 	public void testQuotedFirstFieldWithTrailingSpace() throws Exception {
516 		
517 		// first field has a leading space before quote (should be preserved)
518 		final String input = "\"quoted with trailing spaces\"  ,two";
519 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
520 		tokenizer.readColumns(columns);
521 		assertTrue(columns.size() == 2);
522 		assertEquals("quoted with trailing spaces  ", columns.get(0));
523 		assertEquals("two", columns.get(1));
524 		assertEquals(input, tokenizer.getUntokenizedRow());
525 		
526 		// trailing spaces should be trimmed
527 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
528 		tokenizer.readColumns(columns);
529 		assertTrue(columns.size() == 2);
530 		assertEquals("quoted with trailing spaces", columns.get(0));
531 		assertEquals("two", columns.get(1));
532 		assertEquals(input, tokenizer.getUntokenizedRow());
533 	}
534 	
535 	/**
536 	 * Tests the readColumns() method with a trailing space after the last quoted field. This is not technically valid
537 	 * CSV, but the tokenizer is lenient enough to allow it. The trailing spaces will be trimmed off when surrounding
538 	 * spaces require quotes, otherwise they will be part of the field.
539 	 */
540 	@Test
541 	public void testQuotedLastFieldWithTrailingSpace() throws Exception {
542 		
543 		// last field has a leading space before quote (should be preserved)
544 		final String input = "one,two,\"quoted with trailing spaces\"  ";
545 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
546 		tokenizer.readColumns(columns);
547 		assertTrue(columns.size() == 3);
548 		assertEquals("one", columns.get(0));
549 		assertEquals("two", columns.get(1));
550 		assertEquals("quoted with trailing spaces  ", columns.get(2));
551 		assertEquals(input, tokenizer.getUntokenizedRow());
552 		
553 		// trailing spaces should be trimmed off
554 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
555 		tokenizer.readColumns(columns);
556 		assertTrue(columns.size() == 3);
557 		assertEquals("one", columns.get(0));
558 		assertEquals("two", columns.get(1));
559 		assertEquals("quoted with trailing spaces", columns.get(2));
560 		assertEquals(input, tokenizer.getUntokenizedRow());
561 	}
562 	
563 	/**
564 	 * Tests the readColumns() method with a variety of quoted spaces.
565 	 */
566 	@Test
567 	public void testQuotedSpaces() throws Exception {
568 		
569 		final String input = "\" one \",\"  two  \",\"   three   \"";
570 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
571 		tokenizer.readColumns(columns);
572 		assertTrue(columns.size() == 3);
573 		assertEquals(" one ", columns.get(0));
574 		assertEquals("  two  ", columns.get(1));
575 		assertEquals("   three   ", columns.get(2));
576 		assertEquals(input, tokenizer.getUntokenizedRow());
577 		
578 		// same input when surrounding spaces require quotes (results should be identical)
579 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
580 		tokenizer.readColumns(columns);
581 		assertTrue(columns.size() == 3);
582 		assertEquals(" one ", columns.get(0));
583 		assertEquals("  two  ", columns.get(1));
584 		assertEquals("   three   ", columns.get(2));
585 		assertEquals(input, tokenizer.getUntokenizedRow());
586 	}
587 	
588 	/**
589 	 * Tests the readColumns() method with a variety of unquoted spaces.
590 	 */
591 	@Test
592 	public void testSpaces() throws Exception {
593 		
594 		final String input = " one ,  two  ,   three   ";
595 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
596 		tokenizer.readColumns(columns);
597 		assertTrue(columns.size() == 3);
598 		assertEquals(" one ", columns.get(0));
599 		assertEquals("  two  ", columns.get(1));
600 		assertEquals("   three   ", columns.get(2));
601 		assertEquals(input, tokenizer.getUntokenizedRow());
602 		
603 		// same input when surrounding spaces require quotes
604 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
605 		tokenizer.readColumns(columns);
606 		assertTrue(columns.size() == 3);
607 		assertEquals("one", columns.get(0));
608 		assertEquals("two", columns.get(1));
609 		assertEquals("three", columns.get(2));
610 		assertEquals(input, tokenizer.getUntokenizedRow());
611 	}
612 	
613 	/**
614 	 * Tests the readColumns() method with a variety of spaces and tabs.
615 	 */
616 	@Test
617 	public void testSpacesAndTabs() throws Exception {
618 		
619 		// tabs should never be trimmed
620 		final String input = "\t, \tone\t ,  \ttwo\t  ,   \tthree\t   ";
621 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
622 		tokenizer.readColumns(columns);
623 		assertTrue(columns.size() == 4);
624 		assertEquals("\t", columns.get(0));
625 		assertEquals(" \tone\t ", columns.get(1));
626 		assertEquals("  \ttwo\t  ", columns.get(2));
627 		assertEquals("   \tthree\t   ", columns.get(3));
628 		assertEquals(input, tokenizer.getUntokenizedRow());
629 		
630 		// same input when surrounding spaces require quotes
631 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
632 		tokenizer.readColumns(columns);
633 		assertTrue(columns.size() == 4);
634 		assertEquals("\t", columns.get(0));
635 		assertEquals("\tone\t", columns.get(1));
636 		assertEquals("\ttwo\t", columns.get(2));
637 		assertEquals("\tthree\t", columns.get(3));
638 		assertEquals(input, tokenizer.getUntokenizedRow());
639 	}
640 	
641 	/**
642 	 * Tests the readColumns() method with spaces between words.
643 	 */
644 	@Test
645 	public void testSpacesBetweenWords() throws Exception {
646 		
647 		final String input = " one partridge ,  two turtle doves  ,   three french hens   ";
648 		tokenizer = createTokenizer(input, NORMAL_PREFERENCE);
649 		tokenizer.readColumns(columns);
650 		assertTrue(columns.size() == 3);
651 		assertEquals(" one partridge ", columns.get(0));
652 		assertEquals("  two turtle doves  ", columns.get(1));
653 		assertEquals("   three french hens   ", columns.get(2));
654 		assertEquals(input, tokenizer.getUntokenizedRow());
655 		
656 		// same input when surrounding spaces require quotes
657 		tokenizer = createTokenizer(input, SPACES_NEED_QUOTES_PREFERENCE);
658 		tokenizer.readColumns(columns);
659 		assertTrue(columns.size() == 3);
660 		assertEquals("one partridge", columns.get(0));
661 		assertEquals("two turtle doves", columns.get(1));
662 		assertEquals("three french hens", columns.get(2));
663 		assertEquals(input, tokenizer.getUntokenizedRow());
664 	}
665 	
666 	/**
667 	 * Tests that the CommentStartsWith comment matcher works (comments are skipped).
668 	 */
669 	@Test
670 	public void testSkipCommentsStartsWith() throws IOException {
671 		
672 		final CsvPreference commentsStartWithPrefs = new CsvPreference.Builder(EXCEL_PREFERENCE).skipComments(
673 			new CommentStartsWith("#")).build();
674 		
675 		final String input = "#comment\nnot,a,comment\n# another comment\nalso,not,comment";
676 		final Tokenizer tokenizer = createTokenizer(input, commentsStartWithPrefs);
677 		tokenizer.readColumns(columns);
678 		assertTrue(columns.size() == 3);
679 		assertEquals("not", columns.get(0));
680 		assertEquals("a", columns.get(1));
681 		assertEquals("comment", columns.get(2));
682 		
683 		tokenizer.readColumns(columns);
684 		assertTrue(columns.size() == 3);
685 		assertEquals("also", columns.get(0));
686 		assertEquals("not", columns.get(1));
687 		assertEquals("comment", columns.get(2));
688 		
689 		assertFalse(tokenizer.readColumns(columns));
690 	}
691 	
692 	/**
693 	 * Tests that the CommentMatches comment matcher works (comments are skipped).
694 	 */
695 	@Test
696 	public void testSkipCommentsMatches() throws IOException {
697 		
698 		final CsvPreference commentsMatchesPrefs = new CsvPreference.Builder(EXCEL_PREFERENCE).skipComments(
699 			new CommentMatches("<!--.*-->")).build();
700 		
701 		final String input = "<!--comment-->\nnot,a,comment\n<!-- another comment-->\nalso,not,comment";
702 		final Tokenizer tokenizer = createTokenizer(input, commentsMatchesPrefs);
703 		tokenizer.readColumns(columns);
704 		assertTrue(columns.size() == 3);
705 		assertEquals("not", columns.get(0));
706 		assertEquals("a", columns.get(1));
707 		assertEquals("comment", columns.get(2));
708 		
709 		tokenizer.readColumns(columns);
710 		assertTrue(columns.size() == 3);
711 		assertEquals("also", columns.get(0));
712 		assertEquals("not", columns.get(1));
713 		assertEquals("comment", columns.get(2));
714 		
715 		assertFalse(tokenizer.readColumns(columns));
716 		
717 	}
718 	
719 }