1 /*
2 * Copyright 2007 Kasper B. Graversen
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package org.supercsv.cellprocessor.constraint;
17
18 import java.util.HashSet;
19 import java.util.Set;
20
21 import org.supercsv.cellprocessor.CellProcessorAdaptor;
22 import org.supercsv.cellprocessor.ift.CellProcessor;
23 import org.supercsv.exception.SuperCsvCellProcessorException;
24 import org.supercsv.exception.SuperCsvConstraintViolationException;
25 import org.supercsv.util.CsvContext;
26
27 /**
28 * Ensure that upon processing a CSV file (reading or writing), that values of the column are all unique. Comparison is
29 * based upon each elements <tt>hashCode()</tt> method and lookup takes O(1).
30 * <p>
31 * Compared to {@link Unique} this processor is much more memory efficient as it only stores the set of encountered
32 * hashcodes rather than storing references to all encountered objects. The tradeoff being possible false positives.
33 * <p>
34 * Prior to v1.50 this class was named <tt>Unique</tt> but has been renamed to clarify its inner workings.
35 *
36 * @author Kasper B. Graversen
37 * @author Dominique De Vito
38 * @author James Bassett
39 */
40 public class UniqueHashCode extends CellProcessorAdaptor {
41
42 private final Set<Integer> uniqueSet = new HashSet<Integer>();
43
44 /**
45 * Constructs a new <tt>UniqueHashCode</tt> processor, which ensures that all rows in a column are unique.
46 */
47 public UniqueHashCode() {
48 super();
49 }
50
51 /**
52 * Constructs a new <tt>UniqueHashCode</tt> processor, which ensures that all rows in a column are unique, then
53 * calls the next processor in the chain.
54 *
55 * @param next
56 * the next processor in the chain
57 * @throws NullPointerException
58 * if next is null
59 */
60 public UniqueHashCode(final CellProcessor next) {
61 super(next);
62 }
63
64 /**
65 * {@inheritDoc}
66 *
67 * @throws SuperCsvCellProcessorException
68 * if value is null
69 * @throws SuperCsvConstraintViolationException
70 * if a non-unique value is encountered
71 */
72 public Object execute(final Object value, final CsvContext context) {
73 validateInputNotNull(value, context);
74
75 int hash = value.hashCode();
76 if( !uniqueSet.add(hash) ) {
77 throw new SuperCsvConstraintViolationException(
78 String.format("duplicate value '%s' encountered with hashcode %d", value, hash), context, this);
79 }
80
81 return next.execute(value, context);
82 }
83 }