001 package org.maltparser.core.helper;
002
003 /*
004 * Copyright 2009 Google Inc.
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
007 * use this file except in compliance with the License. You may obtain a copy of
008 * the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
014 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
015 * License for the specific language governing permissions and limitations under
016 * the License.
017 */
018 import java.io.IOException;
019 import java.io.ObjectInputStream;
020 import java.io.ObjectOutputStream;
021 import java.io.Serializable;
022 import java.lang.reflect.Array;
023 import java.util.AbstractSet;
024 import java.util.Collection;
025 import java.util.Iterator;
026 import java.util.NoSuchElementException;
027
028 /**
029 * A memory-efficient hash set.
030 *
031 * @param <E> the element type
032 */
033 public class HashSet<E> extends AbstractSet<E> implements Serializable {
034 private static final long serialVersionUID = 7526471155622776147L;
035 private class SetIterator implements Iterator<E> {
036 private int index = 0;
037 private int last = -1;
038
039 public SetIterator() {
040 advanceToItem();
041 }
042
043 public boolean hasNext() {
044 return index < table.length;
045 }
046
047 @SuppressWarnings("unchecked")
048 public E next() {
049 if (!hasNext()) {
050 throw new NoSuchElementException();
051 }
052 last = index;
053 E toReturn = (E) unmaskNull(table[index++]);
054 advanceToItem();
055 return toReturn;
056 }
057
058 public void remove() {
059 if (last < 0) {
060 throw new IllegalStateException();
061 }
062 internalRemove(last);
063 if (table[last] != null) {
064 index = last;
065 }
066 last = -1;
067 }
068
069 private void advanceToItem() {
070 for (; index < table.length; ++index) {
071 if (table[index] != null) {
072 return;
073 }
074 }
075 }
076 }
077
078 /**
079 * In the interest of memory-savings, we start with the smallest feasible
080 * power-of-two table size that can hold three items without rehashing. If we
081 * started with a size of 2, we'd have to expand as soon as the second item
082 * was added.
083 */
084 private static final int INITIAL_TABLE_SIZE = 4;
085
086 private static final Object NULL_ITEM = new Serializable() {
087 Object readResolve() {
088 return NULL_ITEM;
089 }
090 };
091
092 static Object maskNull(Object o) {
093 return (o == null) ? NULL_ITEM : o;
094 }
095
096 static Object unmaskNull(Object o) {
097 return (o == NULL_ITEM) ? null : o;
098 }
099
100 /**
101 * Number of objects in this set; transient due to custom serialization.
102 * Default access to avoid synthetic accessors from inner classes.
103 */
104 transient int size = 0;
105
106 /**
107 * Backing store for all the objects; transient due to custom serialization.
108 * Default access to avoid synthetic accessors from inner classes.
109 */
110 transient Object[] table;
111
112 public HashSet() {
113 table = new Object[INITIAL_TABLE_SIZE];
114 }
115
116 public HashSet(Collection<? extends E> c) {
117 int newCapacity = INITIAL_TABLE_SIZE;
118 int expectedSize = c.size();
119 while (newCapacity * 3 < expectedSize * 4) {
120 newCapacity <<= 1;
121 }
122
123 table = new Object[newCapacity];
124 super.addAll(c);
125 }
126
127 @Override
128 public boolean add(E e) {
129 ensureSizeFor(size + 1);
130 int index = findOrEmpty(e);
131 if (table[index] == null) {
132 ++size;
133 table[index] = maskNull(e);
134 return true;
135 }
136 return false;
137 }
138
139 @Override
140 public boolean addAll(Collection<? extends E> c) {
141 ensureSizeFor(size + c.size());
142 return super.addAll(c);
143 }
144
145 @Override
146 public void clear() {
147 table = new Object[INITIAL_TABLE_SIZE];
148 size = 0;
149 }
150
151 @Override
152 public boolean contains(Object o) {
153 return find(o) >= 0;
154 }
155
156 @Override
157 public Iterator<E> iterator() {
158 return new SetIterator();
159 }
160
161 @Override
162 public boolean remove(Object o) {
163 int index = find(o);
164 if (index < 0) {
165 return false;
166 }
167 internalRemove(index);
168 return true;
169 }
170
171 @Override
172 public int size() {
173 return size;
174 }
175
176 @Override
177 public Object[] toArray() {
178 return toArray(new Object[size]);
179 }
180
181 @SuppressWarnings("unchecked")
182 @Override
183 public <T> T[] toArray(T[] a) {
184 if (a.length < size) {
185 a = (T[]) Array.newInstance(a.getClass().getComponentType(), size);
186 }
187 int index = 0;
188 for (int i = 0; i < table.length; ++i) {
189 Object e = table[i];
190 if (e != null) {
191 a[index++] = (T) unmaskNull(e);
192 }
193 }
194 while (index < a.length) {
195 a[index++] = null;
196 }
197 return a;
198 }
199
200 /**
201 * Adapted from org.apache.commons.collections.map.AbstractHashedMap.
202 */
203 @SuppressWarnings("unchecked")
204 protected void doReadObject(ObjectInputStream in) throws IOException,
205 ClassNotFoundException {
206 table = new Object[in.readInt()];
207 int items = in.readInt();
208 for (int i = 0; i < items; i++) {
209 add((E) in.readObject());
210 }
211 }
212
213 /**
214 * Adapted from org.apache.commons.collections.map.AbstractHashedMap.
215 */
216 protected void doWriteObject(ObjectOutputStream out) throws IOException {
217 out.writeInt(table.length);
218 out.writeInt(size);
219 for (int i = 0; i < table.length; ++i) {
220 Object e = table[i];
221 if (e != null) {
222 out.writeObject(unmaskNull(e));
223 }
224 }
225 }
226
227 /**
228 * Returns whether two items are equal for the purposes of this set.
229 */
230 protected boolean itemEquals(Object a, Object b) {
231 return (a == null) ? (b == null) : a.equals(b);
232 }
233
234 /**
235 * Return the hashCode for an item.
236 */
237 protected int itemHashCode(Object o) {
238 return (o == null) ? 0 : o.hashCode();
239 }
240
241 /**
242 * Works just like {@link #addAll(Collection)}, but for arrays. Used to avoid
243 * having to synthesize a collection in {@link Sets}.
244 */
245 void addAll(E[] elements) {
246 ensureSizeFor(size + elements.length);
247 for (E e : elements) {
248 int index = findOrEmpty(e);
249 if (table[index] == null) {
250 ++size;
251 table[index] = maskNull(e);
252 }
253 }
254 }
255
256 /**
257 * Removes the item at the specified index, and performs internal management
258 * to make sure we don't wind up with a hole in the table. Default access to
259 * avoid synthetic accessors from inner classes.
260 */
261 void internalRemove(int index) {
262 table[index] = null;
263 --size;
264 plugHole(index);
265 }
266
267 /**
268 * Ensures the set is large enough to contain the specified number of entries.
269 */
270 private void ensureSizeFor(int expectedSize) {
271 if (table.length * 3 >= expectedSize * 4) {
272 return;
273 }
274
275 int newCapacity = table.length << 1;
276 while (newCapacity * 3 < expectedSize * 4) {
277 newCapacity <<= 1;
278 }
279
280 Object[] oldTable = table;
281 table = new Object[newCapacity];
282 for (Object o : oldTable) {
283 if (o != null) {
284 int newIndex = getIndex(unmaskNull(o));
285 while (table[newIndex] != null) {
286 if (++newIndex == table.length) {
287 newIndex = 0;
288 }
289 }
290 table[newIndex] = o;
291 }
292 }
293 }
294
295 /**
296 * Returns the index in the table at which a particular item resides, or -1 if
297 * the item is not in the table.
298 */
299 private int find(Object o) {
300 int index = getIndex(o);
301 while (true) {
302 Object existing = table[index];
303 if (existing == null) {
304 return -1;
305 }
306 if (itemEquals(o, unmaskNull(existing))) {
307 return index;
308 }
309 if (++index == table.length) {
310 index = 0;
311 }
312 }
313 }
314
315 /**
316 * Returns the index in the table at which a particular item resides, or the
317 * index of an empty slot in the table where this item should be inserted if
318 * it is not already in the table.
319 */
320 private int findOrEmpty(Object o) {
321 int index = getIndex(o);
322 while (true) {
323 Object existing = table[index];
324 if (existing == null) {
325 return index;
326 }
327 if (itemEquals(o, unmaskNull(existing))) {
328 return index;
329 }
330 if (++index == table.length) {
331 index = 0;
332 }
333 }
334 }
335
336 private int getIndex(Object o) {
337 int h = itemHashCode(o);
338 // Copied from Apache's AbstractHashedMap; prevents power-of-two collisions.
339 h += ~(h << 9);
340 h ^= (h >>> 14);
341 h += (h << 4);
342 h ^= (h >>> 10);
343 // Power of two trick.
344 return h & (table.length - 1);
345 }
346
347 /**
348 * Tricky, we left a hole in the map, which we have to fill. The only way to
349 * do this is to search forwards through the map shuffling back values that
350 * match this index until we hit a null.
351 */
352 private void plugHole(int hole) {
353 int index = hole + 1;
354 if (index == table.length) {
355 index = 0;
356 }
357 while (table[index] != null) {
358 int targetIndex = getIndex(unmaskNull(table[index]));
359 if (hole < index) {
360 /*
361 * "Normal" case, the index is past the hole and the "bad range" is from
362 * hole (exclusive) to index (inclusive).
363 */
364 if (!(hole < targetIndex && targetIndex <= index)) {
365 // Plug it!
366 table[hole] = table[index];
367 table[index] = null;
368 hole = index;
369 }
370 } else {
371 /*
372 * "Wrapped" case, the index is before the hole (we've wrapped) and the
373 * "good range" is from index (exclusive) to hole (inclusive).
374 */
375 if (index < targetIndex && targetIndex <= hole) {
376 // Plug it!
377 table[hole] = table[index];
378 table[index] = null;
379 hole = index;
380 }
381 }
382 if (++index == table.length) {
383 index = 0;
384 }
385 }
386 }
387
388 private void readObject(ObjectInputStream in) throws IOException,
389 ClassNotFoundException {
390 in.defaultReadObject();
391 doReadObject(in);
392 }
393
394 private void writeObject(ObjectOutputStream out) throws IOException {
395 out.defaultWriteObject();
396 doWriteObject(out);
397 }
398 }