001/* Copyright (C) 2002-2015 Sebastiano Vigna
002 *
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 *     http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. 
014 */
015package regexodus.ds;
016
017import java.util.Arrays;
018
019/**
020 * A type-specific hash map with a fast, small-footprint implementation.
021 * <p>
022 * <P>Instances of this class use a hash table to represent a map. The table is filled up to a specified <em>load factor</em>, and then doubled in size to accommodate new entries. If the table is
023 * emptied below <em>one fourth</em> of the load factor, it is halved in size. However, halving is not performed when deleting entries from an iterator, as it would interfere with the iteration
024 * process.
025 * <p>
026 * <p>Note that {@link #clear()} does not modify the hash table size. Rather, a family of {@linkplain #trim() trimming methods} lets you control the size of the table; this is particularly useful if
027 * you reuse instances of this class.
028 */
029public class CharCharMap implements java.io.Serializable, Cloneable {
030    private static final long serialVersionUID = 0L;
031    private static final boolean ASSERTS = false;
032    /**
033     * The array of keys.
034     */
035    protected transient char[] key;
036    /**
037     * The array of values.
038     */
039    protected transient char[] value;
040    /**
041     * The mask for wrapping a position counter.
042     */
043    protected transient int mask;
044    /**
045     * Whether this set contains the key zero.
046     */
047    protected transient boolean containsNullKey;
048    /**
049     * The current table size.
050     */
051    protected transient int n;
052    /**
053     * Threshold after which we rehash. It must be the table size times {@link #f}.
054     */
055    protected transient int maxFill;
056    /**
057     * Number of entries in the set (including the key zero, if present).
058     */
059    protected int size;
060    /**
061     * The acceptable load factor.
062     */
063    protected final float f;
064    /**
065     * Cached set of keys.
066     */
067    protected transient volatile KeySet keys;
068
069    /**
070     * The default return value for <code>get()</code>, <code>put()</code> and <code>remove()</code>.
071     */
072    protected char defRetValue;
073
074    /**
075     * The initial default size of a hash table.
076     */
077    static final public int DEFAULT_INITIAL_SIZE = 16;
078    /**
079     * The default load factor of a hash table.
080     */
081    static final public float DEFAULT_LOAD_FACTOR = .75f;
082
083    /**
084     * Creates a new hash map.
085     * <p>
086     * <p>The actual table size will be the least power of two greater than <code>expected</code>/<code>f</code>.
087     *
088     * @param expected the expected number of elements in the hash set.
089     * @param f        the load factor.
090     */
091
092    public CharCharMap(final int expected, final float f) {
093        if (f <= 0 || f > 1)
094            throw new IllegalArgumentException("Load factor must be greater than 0 and smaller than or equal to 1");
095        if (expected < 0) throw new IllegalArgumentException("The expected number of elements must be nonnegative");
096        this.f = f;
097        n = arraySize(expected, f);
098        mask = n - 1;
099        maxFill = maxFill(n, f);
100        key = new char[n + 1];
101        value = new char[n + 1];
102    }
103
104    /**
105     * Creates a new hash map with 0.75f as load factor.
106     *
107     * @param expected the expected number of elements in the hash map.
108     */
109    public CharCharMap(final int expected) {
110        this(expected, DEFAULT_LOAD_FACTOR);
111    }
112
113    /**
114     * Creates a new hash map with initial expected 16 entries and 0.75f as load factor.
115     */
116    public CharCharMap() {
117        this(DEFAULT_INITIAL_SIZE, DEFAULT_LOAD_FACTOR);
118    }
119
120    /**
121     * Creates a new hash map using the elements of two parallel arrays.
122     *
123     * @param k the array of keys of the new hash map.
124     * @param v the array of corresponding values in the new hash map.
125     * @param f the load factor.
126     * @throws IllegalArgumentException if <code>k</code> and <code>v</code> have different lengths.
127     */
128    public CharCharMap(final char[] k, final char[] v, final float f) {
129        this(k.length, f);
130        if (k.length != v.length)
131            throw new IllegalArgumentException("The key array and the value array have different lengths (" + k.length + " and " + v.length + ")");
132        for (int i = 0; i < k.length; i++)
133            this.put(k[i], v[i]);
134    }
135
136    /**
137     * Creates a new hash map with 0.75f as load factor using the elements of two parallel arrays.
138     *
139     * @param k the array of keys of the new hash map.
140     * @param v the array of corresponding values in the new hash map.
141     * @throws IllegalArgumentException if <code>k</code> and <code>v</code> have different lengths.
142     */
143    public CharCharMap(final char[] k, final char[] v) {
144        this(k, v, DEFAULT_LOAD_FACTOR);
145    }
146
147    public void defaultReturnValue(final char rv) {
148        defRetValue = rv;
149    }
150
151    public char defaultReturnValue() {
152        return defRetValue;
153    }
154
155    private int realSize() {
156        return containsNullKey ? size - 1 : size;
157    }
158
159    private void ensureCapacity(final int capacity) {
160        final int needed = arraySize(capacity, f);
161        if (needed > n) rehash(needed);
162    }
163
164    private void tryCapacity(final long capacity) {
165        final int needed = (int) Math.min(1 << 30, Math.max(2, HashCommon.nextPowerOfTwo((long) Math.ceil(capacity / f))));
166        if (needed > n) rehash(needed);
167    }
168
169    private char removeEntry(final int pos) {
170        final char oldValue = value[pos];
171        size--;
172        shiftKeys(pos);
173        if (size < maxFill / 4 && n > DEFAULT_INITIAL_SIZE) rehash(n / 2);
174        return oldValue;
175    }
176
177    private char removeNullEntry() {
178        containsNullKey = false;
179        final char oldValue = value[n];
180        size--;
181        if (size < maxFill / 4 && n > DEFAULT_INITIAL_SIZE) rehash(n / 2);
182        return oldValue;
183    }
184
185    private int insert(final char k, final char v) {
186        int pos;
187        if (((k) == ((char) 0))) {
188            if (containsNullKey) return n;
189            containsNullKey = true;
190            pos = n;
191        } else {
192            char curr;
193            final char[] key = this.key;
194            // The starting point.
195            if (!((curr = key[pos = (HashCommon.mix((k))) & mask]) == ((char) 0))) {
196                if (((curr) == (k))) return pos;
197                while (!((curr = key[pos = (pos + 1) & mask]) == ((char) 0)))
198                    if (((curr) == (k))) return pos;
199            }
200        }
201        key[pos] = k;
202        value[pos] = v;
203        if (size++ >= maxFill) rehash(arraySize(size + 1, f));
204        return -1;
205    }
206
207    public char put(final char k, final char v) {
208        final int pos = insert(k, v);
209        if (pos < 0) return defRetValue;
210        final char oldValue = value[pos];
211        value[pos] = v;
212        return oldValue;
213    }
214
215    /**
216     * Shifts left entries with the specified hash code, starting at the specified position, and empties the resulting free entry.
217     *
218     * @param pos a starting position.
219     */
220    protected final void shiftKeys(int pos) {
221        // Shift entries with the same hash.
222        int last, slot;
223        char curr;
224        final char[] key = this.key;
225        for (; ; ) {
226            pos = ((last = pos) + 1) & mask;
227            for (; ; ) {
228                if (((curr = key[pos]) == ((char) 0))) {
229                    key[last] = ((char) 0);
230                    return;
231                }
232                slot = (HashCommon.mix((curr))) & mask;
233                if (last <= pos ? last >= slot || slot > pos : last >= slot && slot > pos) break;
234                pos = (pos + 1) & mask;
235            }
236            key[last] = curr;
237            value[last] = value[pos];
238        }
239    }
240
241    public char remove(final char k) {
242        if (((k) == ((char) 0))) {
243            if (containsNullKey) return removeNullEntry();
244            return defRetValue;
245        }
246        char curr;
247        final char[] key = this.key;
248        int pos;
249        // The starting point.
250        if (((curr = key[pos = (HashCommon.mix((k))) & mask]) == ((char) 0))) return defRetValue;
251        if (((k) == (curr))) return removeEntry(pos);
252        while (true) {
253            if (((curr = key[pos = (pos + 1) & mask]) == ((char) 0))) return defRetValue;
254            if (((k) == (curr))) return removeEntry(pos);
255        }
256    }
257
258    public char get(final char k) {
259        if (((k) == ((char) 0))) return containsNullKey ? value[n] : defRetValue;
260        char curr;
261        final char[] key = this.key;
262        int pos;
263        // The starting point.
264        if (((curr = key[pos = (HashCommon.mix((k))) & mask]) == ((char) 0))) return defRetValue;
265        if (((k) == (curr))) return value[pos];
266        // There's always an unused entry.
267        while (true) {
268            if (((curr = key[pos = (pos + 1) & mask]) == ((char) 0))) return defRetValue;
269            if (((k) == (curr))) return value[pos];
270        }
271    }
272
273    public boolean containsKey(final char k) {
274        if (((k) == ((char) 0))) return containsNullKey;
275        char curr;
276        final char[] key = this.key;
277        int pos;
278        // The starting point.
279        if (((curr = key[pos = (HashCommon.mix((k))) & mask]) == ((char) 0))) return false;
280        if (((k) == (curr))) return true;
281        // There's always an unused entry.
282        while (true) {
283            if (((curr = key[pos = (pos + 1) & mask]) == ((char) 0))) return false;
284            if (((k) == (curr))) return true;
285        }
286    }
287
288    public boolean containsValue(final char v) {
289        final char value[] = this.value;
290        final char key[] = this.key;
291        if (containsNullKey && ((value[n]) == (v))) return true;
292        for (int i = n; i-- != 0; )
293            if (!((key[i]) == ((char) 0)) && ((value[i]) == (v))) return true;
294        return false;
295    }
296
297    /* Removes all elements from this map.
298     *
299     * <P>To increase object reuse, this method does not change the table size. If you want to reduce the table size, you must use {@link #trim()}. */
300    public void clear() {
301        if (size == 0) return;
302        size = 0;
303        containsNullKey = false;
304        Arrays.fill(key, ((char) 0));
305    }
306
307    public int size() {
308        return size;
309    }
310
311    public boolean isEmpty() {
312        return size == 0;
313    }
314
315    /**
316     * A no-op for backward compatibility.
317     *
318     * @param growthFactor unused.
319     * @deprecated Since <code>fastutil</code> 6.1.0, hash tables are doubled when they are too full.
320     */
321    @Deprecated
322    public void growthFactor(int growthFactor) {
323    }
324
325    /**
326     * Gets the growth factor (2).
327     *
328     * @return the growth factor of this set, which is fixed (2).
329     * @see #growthFactor(int)
330     * @deprecated Since <code>fastutil</code> 6.1.0, hash tables are doubled when they are too full.
331     */
332    @Deprecated
333    public int growthFactor() {
334        return 16;
335    }
336
337    private final class KeySet {
338
339        public int size() {
340            return size;
341        }
342
343        public boolean contains(char k) {
344            return containsKey(k);
345        }
346
347        public boolean remove(char k) {
348            final int oldSize = size;
349            CharCharMap.this.remove(k);
350            return size != oldSize;
351        }
352
353        public void clear() {
354            CharCharMap.this.clear();
355        }
356
357        /**
358         * Delegates to the corresponding type-specific method.
359         */
360        public boolean remove(final Object o) {
361            return remove(((((Character) (o)).charValue())));
362        }
363    }
364
365    public KeySet keySet() {
366        if (keys == null) keys = new KeySet();
367        return keys;
368    }
369
370    /**
371     * Rehashes the map, making the table as small as possible.
372     * <p>
373     * <P>This method rehashes the table to the smallest size satisfying the load factor. It can be used when the set will not be changed anymore, so to optimize access speed and size.
374     * <p>
375     * <P>If the table size is already the minimum possible, this method does nothing.
376     *
377     * @return true if there was enough memory to trim the map.
378     * @see #trim(int)
379     */
380    public boolean trim() {
381        final int l = arraySize(size, f);
382        if (l >= n || size > maxFill(l, f)) return true;
383        try {
384            rehash(l);
385        } catch (Error cantDoIt) {
386            return false;
387        }
388        return true;
389    }
390
391    /**
392     * Rehashes this map if the table is too large.
393     * <p>
394     * <P>Let <var>N</var> be the smallest table size that can hold <code>max(n,{@link #size()})</code> entries, still satisfying the load factor. If the current table size is smaller than or equal to
395     * <var>N</var>, this method does nothing. Otherwise, it rehashes this map in a table of size <var>N</var>.
396     * <p>
397     * <P>This method is useful when reusing maps. {@linkplain #clear() Clearing a map} leaves the table size untouched. If you are reusing a map many times, you can call this method with a typical
398     * size to avoid keeping around a very large table just because of a few large transient maps.
399     *
400     * @param n the threshold for the trimming.
401     * @return true if there was enough memory to trim the map.
402     * @see #trim()
403     */
404    public boolean trim(final int n) {
405        final int l = HashCommon.nextPowerOfTwo((int) Math.ceil(n / f));
406        if (l >= n || size > maxFill(l, f)) return true;
407        try {
408            rehash(l);
409        } catch (Error cantDoIt) {
410            return false;
411        }
412        return true;
413    }
414
415    /**
416     * Rehashes the map.
417     * <p>
418     * <P>This method implements the basic rehashing strategy, and may be overriden by subclasses implementing different rehashing strategies (e.g., disk-based rehashing). However, you should not
419     * override this method unless you understand the internal workings of this class.
420     *
421     * @param newN the new size
422     */
423
424    protected void rehash(final int newN) {
425        final char key[] = this.key;
426        final char value[] = this.value;
427        final int mask = newN - 1; // Note that this is used by the hashing macro
428        final char newKey[] = new char[newN + 1];
429        final char newValue[] = new char[newN + 1];
430        int i = n, pos;
431        for (int j = realSize(); j-- != 0; ) {
432            while (((key[--i]) == ((char) 0))) ;
433            if (!((newKey[pos = (HashCommon.mix((key[i]))) & mask]) == ((char) 0)))
434                while (!((newKey[pos = (pos + 1) & mask]) == ((char) 0))) ;
435            newKey[pos] = key[i];
436            newValue[pos] = value[i];
437        }
438        newValue[newN] = value[n];
439        n = newN;
440        this.mask = mask;
441        maxFill = maxFill(n, f);
442        this.key = newKey;
443        this.value = newValue;
444    }
445
446    /**
447     * Returns a deep copy of this map.
448     * <p>
449     * This method performs a deep copy of this hash map, but with primitive keys and values it doesn't matter much.
450     * @return a deep copy of this map.
451     */
452
453    public CharCharMap clone() {
454        char[] k = new char[key.length], v = new char[value.length];
455        System.arraycopy(key, 0, k, 0, key.length);
456        System.arraycopy(value, 0, v, 0, value.length);
457        return new CharCharMap(k, v, f);
458    }
459
460    /**
461     * Returns a hash code for this map.
462     * <p>
463     * This method overrides the generic method provided by the superclass. Since <code>equals()</code> is not overriden, it is important that the value returned by this method is the same value as
464     * the one returned by the overriden method.
465     *
466     * @return a hash code for this map.
467     */
468    public int hashCode() {
469        int h = 0;
470        for (int j = realSize(), i = 0, t = 0; j-- != 0; ) {
471            while (((key[i]) == ((char) 0)))
472                i++;
473            t = (key[i]);
474            t ^= (value[i]);
475            h += t;
476            i++;
477        }
478        // Zero / null keys have hash zero.
479        if (containsNullKey) h += (value[n]);
480        return h;
481    }
482
483    /**
484     * Returns the maximum number of entries that can be filled before rehashing.
485     *
486     * @param n the size of the backing array.
487     * @param f the load factor.
488     * @return the maximum number of entries before rehashing.
489     */
490    public static int maxFill(final int n, final float f) {
491        /* We must guarantee that there is always at least
492                 * one free entry (even with pathological load factors). */
493        return Math.min((int) Math.ceil(n * f), n - 1);
494    }
495
496    /**
497     * Returns the maximum number of entries that can be filled before rehashing.
498     *
499     * @param n the size of the backing array.
500     * @param f the load factor.
501     * @return the maximum number of entries before rehashing.
502     */
503    public static long maxFill(final long n, final float f) {
504                /* We must guarantee that there is always at least 
505                 * one free entry (even with pathological load factors). */
506        return Math.min((long) Math.ceil(n * f), n - 1);
507    }
508
509    /**
510     * Returns the least power of two smaller than or equal to 2<sup>30</sup> and larger than or equal to <code>Math.ceil( expected / f )</code>.
511     *
512     * @param expected the expected number of elements in a hash table.
513     * @param f        the load factor.
514     * @return the minimum possible size for a backing array.
515     * @throws IllegalArgumentException if the necessary size is larger than 2<sup>30</sup>.
516     */
517    public static int arraySize(final int expected, final float f) {
518        final long s = Math.max(2, HashCommon.nextPowerOfTwo((long) Math.ceil(expected / f)));
519        if (s > (1 << 30))
520            throw new IllegalArgumentException("Too large (" + expected + " expected elements with load factor " + f + ")");
521        return (int) s;
522    }
523
524    private static class HashCommon {
525
526        private HashCommon() {
527        }
528
529        ;
530
531        /**
532         * This reference is used to fill keys and values of removed entries (if
533         * they are objects). <code>null</code> cannot be used as it would confuse the
534         * search algorithm in the presence of an actual <code>null</code> key.
535         */
536        public static final Object REMOVED = new Object();
537
538        /**
539         * 2<sup>32</sup> &middot; &phi;, &phi; = (&#x221A;5 &minus; 1)/2.
540         */
541        private static final int INT_PHI = 0x9E3779B9;
542        /**
543         * The reciprocal of {@link #INT_PHI} modulo 2<sup>32</sup>.
544         */
545        private static final int INV_INT_PHI = 0x144cbc89;
546        /**
547         * 2<sup>64</sup> &middot; &phi;, &phi; = (&#x221A;5 &minus; 1)/2.
548         */
549        private static final long LONG_PHI = 0x9E3779B97F4A7C15L;
550        /**
551         * The reciprocal of {@link #LONG_PHI} modulo 2<sup>64</sup>.
552         */
553        private static final long INV_LONG_PHI = 0xf1de83e19937733dL;
554
555        /**
556         * Avalanches the bits of an integer by applying the finalisation step of MurmurHash3.
557         * <p>
558         * <p>This method implements the finalisation step of Austin Appleby's <a href="http://code.google.com/p/smhasher/">MurmurHash3</a>.
559         * Its purpose is to avalanche the bits of the argument to within 0.25% bias.
560         *
561         * @param x an integer.
562         * @return a hash value with good avalanching properties.
563         */
564        public final static int murmurHash3(int x) {
565            x ^= x >>> 16;
566            x *= 0x85ebca6b;
567            x ^= x >>> 13;
568            x *= 0xc2b2ae35;
569            x ^= x >>> 16;
570            return x;
571        }
572
573
574        /**
575         * Avalanches the bits of a long integer by applying the finalisation step of MurmurHash3.
576         * <p>
577         * <p>This method implements the finalisation step of Austin Appleby's <a href="http://code.google.com/p/smhasher/">MurmurHash3</a>.
578         * Its purpose is to avalanche the bits of the argument to within 0.25% bias.
579         *
580         * @param x a long integer.
581         * @return a hash value with good avalanching properties.
582         */
583        public final static long murmurHash3(long x) {
584            x ^= x >>> 33;
585            x *= 0xff51afd7ed558ccdL;
586            x ^= x >>> 33;
587            x *= 0xc4ceb9fe1a85ec53L;
588            x ^= x >>> 33;
589            return x;
590        }
591
592        /**
593         * Quickly mixes the bits of an integer.
594         * <p>
595         * <p>This method mixes the bits of the argument by multiplying by the golden ratio and
596         * xorshifting the result. It is borrowed from <a href="https://github.com/OpenHFT/Koloboke">Koloboke</a>, and
597         * it has slightly worse behaviour than {@link #murmurHash3(int)} (in open-addressing hash tables the average number of probes
598         * is slightly larger), but it's much faster.
599         *
600         * @param x an integer.
601         * @return a hash value obtained by mixing the bits of {@code x}.
602         * @see #invMix(int)
603         */
604        public final static int mix(final int x) {
605            final int h = x * INT_PHI;
606            return h ^ (h >>> 16);
607        }
608
609        /**
610         * The inverse of {@link #mix(int)}. This method is mainly useful to create unit tests.
611         *
612         * @param x an integer.
613         * @return a value that passed through {@link #mix(int)} would give {@code x}.
614         */
615        public final static int invMix(final int x) {
616            return (x ^ x >>> 16) * INV_INT_PHI;
617        }
618
619        /**
620         * Quickly mixes the bits of a long integer.
621         * <p>
622         * <p>This method mixes the bits of the argument by multiplying by the golden ratio and
623         * xorshifting twice the result. It is borrowed from <a href="https://github.com/OpenHFT/Koloboke">Koloboke</a>, and
624         * it has slightly worse behaviour than {@link #murmurHash3(long)} (in open-addressing hash tables the average number of probes
625         * is slightly larger), but it's much faster.
626         *
627         * @param x a long integer.
628         * @return a hash value obtained by mixing the bits of {@code x}.
629         */
630        public final static long mix(final long x) {
631            long h = x * LONG_PHI;
632            h ^= h >>> 32;
633            return h ^ (h >>> 16);
634        }
635
636        /**
637         * The inverse of {@link #mix(long)}. This method is mainly useful to create unit tests.
638         *
639         * @param x a long integer.
640         * @return a value that passed through {@link #mix(long)} would give {@code x}.
641         */
642        public final static long invMix(long x) {
643            x ^= x >>> 32;
644            x ^= x >>> 16;
645            return (x ^ x >>> 32) * INV_LONG_PHI;
646        }
647
648
649        /**
650         * Returns the hash code that would be returned by {@link Float#hashCode()}.
651         *
652         * @param f a float.
653         * @return the same code as {@link Float#hashCode() new Float(f).hashCode()}.
654         */
655
656        final public static int float2int(final float f) {
657            return Float.floatToIntBits(f);
658        }
659
660        /**
661         * Returns the hash code that would be returned by {@link Double#hashCode()}.
662         *
663         * @param d a double.
664         * @return the same code as {@link Double#hashCode() new Double(f).hashCode()}.
665         */
666
667        final public static int double2int(final double d) {
668            final long l = Double.doubleToLongBits(d);
669            return (int) (l ^ (l >>> 32));
670        }
671
672        /**
673         * Returns the hash code that would be returned by {@link Long#hashCode()}.
674         *
675         * @param l a long.
676         * @return the same code as {@link Long#hashCode() new Long(f).hashCode()}.
677         */
678        final public static int long2int(final long l) {
679            return (int) (l ^ (l >>> 32));
680        }
681
682        /**
683         * Return the least power of two greater than or equal to the specified value.
684         * <p>
685         * <p>Note that this function will return 1 when the argument is 0.
686         *
687         * @param x an integer smaller than or equal to 2<sup>30</sup>.
688         * @return the least power of two greater than or equal to the specified value.
689         */
690        public static int nextPowerOfTwo(int x) {
691            if (x == 0) return 1;
692            x--;
693            x |= x >> 1;
694            x |= x >> 2;
695            x |= x >> 4;
696            x |= x >> 8;
697            return (x | x >> 16) + 1;
698        }
699
700        /**
701         * Return the least power of two greater than or equal to the specified value.
702         * <p>
703         * <p>Note that this function will return 1 when the argument is 0.
704         *
705         * @param x a long integer smaller than or equal to 2<sup>62</sup>.
706         * @return the least power of two greater than or equal to the specified value.
707         */
708        public static long nextPowerOfTwo(long x) {
709            if (x == 0) return 1;
710            x--;
711            x |= x >> 1;
712            x |= x >> 2;
713            x |= x >> 4;
714            x |= x >> 8;
715            x |= x >> 16;
716            return (x | x >> 32) + 1;
717        }
718
719        /**
720         * Returns the least power of two larger than or equal to <code>Math.ceil( expected / f )</code>.
721         *
722         * @param expected the expected number of elements in a hash table.
723         * @param f        the load factor.
724         * @return the minimum possible size for a backing big array.
725         */
726        public static long bigArraySize(final long expected, final float f) {
727            return nextPowerOfTwo((long) Math.ceil(expected / f));
728        }
729    }
730}