View Javadoc
1   /*
2    * Copyright 2007 Kasper B. Graversen
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.supercsv.cellprocessor.constraint;
17  
18  import java.util.HashSet;
19  import java.util.Set;
20  
21  import org.supercsv.cellprocessor.CellProcessorAdaptor;
22  import org.supercsv.cellprocessor.ift.CellProcessor;
23  import org.supercsv.exception.SuperCsvCellProcessorException;
24  import org.supercsv.exception.SuperCsvConstraintViolationException;
25  import org.supercsv.util.CsvContext;
26  
27  /**
28   * Ensure that upon processing a CSV file (reading or writing), that values of the column are all unique. Comparison is
29   * based upon each elements <tt>hashCode()</tt> method and lookup takes O(1).
30   * <p>
31   * Compared to {@link Unique} this processor is much more memory efficient as it only stores the set of encountered
32   * hashcodes rather than storing references to all encountered objects. The tradeoff being possible false positives.
33   * <p>
34   * Prior to v1.50 this class was named <tt>Unique</tt> but has been renamed to clarify its inner workings.
35   * 
36   * @author Kasper B. Graversen
37   * @author Dominique De Vito
38   * @author James Bassett
39   */
40  public class UniqueHashCode extends CellProcessorAdaptor {
41  	
42  	private final Set<Integer> uniqueSet = new HashSet<Integer>();
43  	
44  	/**
45  	 * Constructs a new <tt>UniqueHashCode</tt> processor, which ensures that all rows in a column are unique.
46  	 */
47  	public UniqueHashCode() {
48  		super();
49  	}
50  	
51  	/**
52  	 * Constructs a new <tt>UniqueHashCode</tt> processor, which ensures that all rows in a column are unique, then
53  	 * calls the next processor in the chain.
54  	 * 
55  	 * @param next
56  	 *            the next processor in the chain
57  	 * @throws NullPointerException
58  	 *             if next is null
59  	 */
60  	public UniqueHashCode(final CellProcessor next) {
61  		super(next);
62  	}
63  	
64  	/**
65  	 * {@inheritDoc}
66  	 * 
67  	 * @throws SuperCsvCellProcessorException
68  	 *             if value is null
69  	 * @throws SuperCsvConstraintViolationException
70  	 *             if a non-unique value is encountered
71  	 */
72  	public Object execute(final Object value, final CsvContext context) {
73  		validateInputNotNull(value, context);
74  		
75  		int hash = value.hashCode();
76  		if( !uniqueSet.add(hash) ) {
77  			throw new SuperCsvConstraintViolationException(
78  				String.format("duplicate value '%s' encountered with hashcode %d", value, hash), context, this);
79  		}
80  		
81  		return next.execute(value, context);
82  	}
83  }