001/* Copyright (C) 2002-2015 Sebastiano Vigna 002 * 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. 014 */ 015package regexodus.ds; 016 017import java.util.Arrays; 018 019/** 020 * A type-specific hash map with a fast, small-footprint implementation. 021 * <p> 022 * <P>Instances of this class use a hash table to represent a map. The table is filled up to a specified <em>load factor</em>, and then doubled in size to accommodate new entries. If the table is 023 * emptied below <em>one fourth</em> of the load factor, it is halved in size. However, halving is not performed when deleting entries from an iterator, as it would interfere with the iteration 024 * process. 025 * <p> 026 * <p>Note that {@link #clear()} does not modify the hash table size. Rather, a family of {@linkplain #trim() trimming methods} lets you control the size of the table; this is particularly useful if 027 * you reuse instances of this class. 028 */ 029public class CharCharMap implements java.io.Serializable, Cloneable { 030 private static final long serialVersionUID = 0L; 031 private static final boolean ASSERTS = false; 032 /** 033 * The array of keys. 034 */ 035 protected transient char[] key; 036 /** 037 * The array of values. 038 */ 039 protected transient char[] value; 040 /** 041 * The mask for wrapping a position counter. 042 */ 043 protected transient int mask; 044 /** 045 * Whether this set contains the key zero. 046 */ 047 protected transient boolean containsNullKey; 048 /** 049 * The current table size. 050 */ 051 protected transient int n; 052 /** 053 * Threshold after which we rehash. It must be the table size times {@link #f}. 054 */ 055 protected transient int maxFill; 056 /** 057 * Number of entries in the set (including the key zero, if present). 058 */ 059 protected int size; 060 /** 061 * The acceptable load factor. 062 */ 063 protected final float f; 064 /** 065 * Cached set of keys. 066 */ 067 protected transient volatile KeySet keys; 068 069 /** 070 * The default return value for <code>get()</code>, <code>put()</code> and <code>remove()</code>. 071 */ 072 protected char defRetValue; 073 074 /** 075 * The initial default size of a hash table. 076 */ 077 static final public int DEFAULT_INITIAL_SIZE = 16; 078 /** 079 * The default load factor of a hash table. 080 */ 081 static final public float DEFAULT_LOAD_FACTOR = .75f; 082 083 /** 084 * Creates a new hash map. 085 * <p> 086 * <p>The actual table size will be the least power of two greater than <code>expected</code>/<code>f</code>. 087 * 088 * @param expected the expected number of elements in the hash set. 089 * @param f the load factor. 090 */ 091 092 public CharCharMap(final int expected, final float f) { 093 if (f <= 0 || f > 1) 094 throw new IllegalArgumentException("Load factor must be greater than 0 and smaller than or equal to 1"); 095 if (expected < 0) throw new IllegalArgumentException("The expected number of elements must be nonnegative"); 096 this.f = f; 097 n = arraySize(expected, f); 098 mask = n - 1; 099 maxFill = maxFill(n, f); 100 key = new char[n + 1]; 101 value = new char[n + 1]; 102 } 103 104 /** 105 * Creates a new hash map with 0.75f as load factor. 106 * 107 * @param expected the expected number of elements in the hash map. 108 */ 109 public CharCharMap(final int expected) { 110 this(expected, DEFAULT_LOAD_FACTOR); 111 } 112 113 /** 114 * Creates a new hash map with initial expected 16 entries and 0.75f as load factor. 115 */ 116 public CharCharMap() { 117 this(DEFAULT_INITIAL_SIZE, DEFAULT_LOAD_FACTOR); 118 } 119 120 /** 121 * Creates a new hash map using the elements of two parallel arrays. 122 * 123 * @param k the array of keys of the new hash map. 124 * @param v the array of corresponding values in the new hash map. 125 * @param f the load factor. 126 * @throws IllegalArgumentException if <code>k</code> and <code>v</code> have different lengths. 127 */ 128 public CharCharMap(final char[] k, final char[] v, final float f) { 129 this(k.length, f); 130 if (k.length != v.length) 131 throw new IllegalArgumentException("The key array and the value array have different lengths (" + k.length + " and " + v.length + ")"); 132 for (int i = 0; i < k.length; i++) 133 this.put(k[i], v[i]); 134 } 135 136 /** 137 * Creates a new hash map with 0.75f as load factor using the elements of two parallel arrays. 138 * 139 * @param k the array of keys of the new hash map. 140 * @param v the array of corresponding values in the new hash map. 141 * @throws IllegalArgumentException if <code>k</code> and <code>v</code> have different lengths. 142 */ 143 public CharCharMap(final char[] k, final char[] v) { 144 this(k, v, DEFAULT_LOAD_FACTOR); 145 } 146 147 public void defaultReturnValue(final char rv) { 148 defRetValue = rv; 149 } 150 151 public char defaultReturnValue() { 152 return defRetValue; 153 } 154 155 private int realSize() { 156 return containsNullKey ? size - 1 : size; 157 } 158 159 private void ensureCapacity(final int capacity) { 160 final int needed = arraySize(capacity, f); 161 if (needed > n) rehash(needed); 162 } 163 164 private void tryCapacity(final long capacity) { 165 final int needed = (int) Math.min(1 << 30, Math.max(2, HashCommon.nextPowerOfTwo((long) Math.ceil(capacity / f)))); 166 if (needed > n) rehash(needed); 167 } 168 169 private char removeEntry(final int pos) { 170 final char oldValue = value[pos]; 171 size--; 172 shiftKeys(pos); 173 if (size < maxFill / 4 && n > DEFAULT_INITIAL_SIZE) rehash(n / 2); 174 return oldValue; 175 } 176 177 private char removeNullEntry() { 178 containsNullKey = false; 179 final char oldValue = value[n]; 180 size--; 181 if (size < maxFill / 4 && n > DEFAULT_INITIAL_SIZE) rehash(n / 2); 182 return oldValue; 183 } 184 185 private int insert(final char k, final char v) { 186 int pos; 187 if (((k) == ((char) 0))) { 188 if (containsNullKey) return n; 189 containsNullKey = true; 190 pos = n; 191 } else { 192 char curr; 193 final char[] key = this.key; 194 // The starting point. 195 if (!((curr = key[pos = (HashCommon.mix((k))) & mask]) == ((char) 0))) { 196 if (((curr) == (k))) return pos; 197 while (!((curr = key[pos = (pos + 1) & mask]) == ((char) 0))) 198 if (((curr) == (k))) return pos; 199 } 200 } 201 key[pos] = k; 202 value[pos] = v; 203 if (size++ >= maxFill) rehash(arraySize(size + 1, f)); 204 return -1; 205 } 206 207 public char put(final char k, final char v) { 208 final int pos = insert(k, v); 209 if (pos < 0) return defRetValue; 210 final char oldValue = value[pos]; 211 value[pos] = v; 212 return oldValue; 213 } 214 215 /** 216 * Shifts left entries with the specified hash code, starting at the specified position, and empties the resulting free entry. 217 * 218 * @param pos a starting position. 219 */ 220 protected final void shiftKeys(int pos) { 221 // Shift entries with the same hash. 222 int last, slot; 223 char curr; 224 final char[] key = this.key; 225 for (; ; ) { 226 pos = ((last = pos) + 1) & mask; 227 for (; ; ) { 228 if (((curr = key[pos]) == ((char) 0))) { 229 key[last] = ((char) 0); 230 return; 231 } 232 slot = (HashCommon.mix((curr))) & mask; 233 if (last <= pos ? last >= slot || slot > pos : last >= slot && slot > pos) break; 234 pos = (pos + 1) & mask; 235 } 236 key[last] = curr; 237 value[last] = value[pos]; 238 } 239 } 240 241 public char remove(final char k) { 242 if (((k) == ((char) 0))) { 243 if (containsNullKey) return removeNullEntry(); 244 return defRetValue; 245 } 246 char curr; 247 final char[] key = this.key; 248 int pos; 249 // The starting point. 250 if (((curr = key[pos = (HashCommon.mix((k))) & mask]) == ((char) 0))) return defRetValue; 251 if (((k) == (curr))) return removeEntry(pos); 252 while (true) { 253 if (((curr = key[pos = (pos + 1) & mask]) == ((char) 0))) return defRetValue; 254 if (((k) == (curr))) return removeEntry(pos); 255 } 256 } 257 258 public char get(final char k) { 259 if (((k) == ((char) 0))) return containsNullKey ? value[n] : defRetValue; 260 char curr; 261 final char[] key = this.key; 262 int pos; 263 // The starting point. 264 if (((curr = key[pos = (HashCommon.mix((k))) & mask]) == ((char) 0))) return defRetValue; 265 if (((k) == (curr))) return value[pos]; 266 // There's always an unused entry. 267 while (true) { 268 if (((curr = key[pos = (pos + 1) & mask]) == ((char) 0))) return defRetValue; 269 if (((k) == (curr))) return value[pos]; 270 } 271 } 272 273 public boolean containsKey(final char k) { 274 if (((k) == ((char) 0))) return containsNullKey; 275 char curr; 276 final char[] key = this.key; 277 int pos; 278 // The starting point. 279 if (((curr = key[pos = (HashCommon.mix((k))) & mask]) == ((char) 0))) return false; 280 if (((k) == (curr))) return true; 281 // There's always an unused entry. 282 while (true) { 283 if (((curr = key[pos = (pos + 1) & mask]) == ((char) 0))) return false; 284 if (((k) == (curr))) return true; 285 } 286 } 287 288 public boolean containsValue(final char v) { 289 final char value[] = this.value; 290 final char key[] = this.key; 291 if (containsNullKey && ((value[n]) == (v))) return true; 292 for (int i = n; i-- != 0; ) 293 if (!((key[i]) == ((char) 0)) && ((value[i]) == (v))) return true; 294 return false; 295 } 296 297 /* Removes all elements from this map. 298 * 299 * <P>To increase object reuse, this method does not change the table size. If you want to reduce the table size, you must use {@link #trim()}. */ 300 public void clear() { 301 if (size == 0) return; 302 size = 0; 303 containsNullKey = false; 304 Arrays.fill(key, ((char) 0)); 305 } 306 307 public int size() { 308 return size; 309 } 310 311 public boolean isEmpty() { 312 return size == 0; 313 } 314 315 /** 316 * A no-op for backward compatibility. 317 * 318 * @param growthFactor unused. 319 * @deprecated Since <code>fastutil</code> 6.1.0, hash tables are doubled when they are too full. 320 */ 321 @Deprecated 322 public void growthFactor(int growthFactor) { 323 } 324 325 /** 326 * Gets the growth factor (2). 327 * 328 * @return the growth factor of this set, which is fixed (2). 329 * @see #growthFactor(int) 330 * @deprecated Since <code>fastutil</code> 6.1.0, hash tables are doubled when they are too full. 331 */ 332 @Deprecated 333 public int growthFactor() { 334 return 16; 335 } 336 337 private final class KeySet { 338 339 public int size() { 340 return size; 341 } 342 343 public boolean contains(char k) { 344 return containsKey(k); 345 } 346 347 public boolean remove(char k) { 348 final int oldSize = size; 349 CharCharMap.this.remove(k); 350 return size != oldSize; 351 } 352 353 public void clear() { 354 CharCharMap.this.clear(); 355 } 356 357 /** 358 * Delegates to the corresponding type-specific method. 359 */ 360 public boolean remove(final Object o) { 361 return remove(((((Character) (o)).charValue()))); 362 } 363 } 364 365 public KeySet keySet() { 366 if (keys == null) keys = new KeySet(); 367 return keys; 368 } 369 370 /** 371 * Rehashes the map, making the table as small as possible. 372 * <p> 373 * <P>This method rehashes the table to the smallest size satisfying the load factor. It can be used when the set will not be changed anymore, so to optimize access speed and size. 374 * <p> 375 * <P>If the table size is already the minimum possible, this method does nothing. 376 * 377 * @return true if there was enough memory to trim the map. 378 * @see #trim(int) 379 */ 380 public boolean trim() { 381 final int l = arraySize(size, f); 382 if (l >= n || size > maxFill(l, f)) return true; 383 try { 384 rehash(l); 385 } catch (Error cantDoIt) { 386 return false; 387 } 388 return true; 389 } 390 391 /** 392 * Rehashes this map if the table is too large. 393 * <p> 394 * <P>Let <var>N</var> be the smallest table size that can hold <code>max(n,{@link #size()})</code> entries, still satisfying the load factor. If the current table size is smaller than or equal to 395 * <var>N</var>, this method does nothing. Otherwise, it rehashes this map in a table of size <var>N</var>. 396 * <p> 397 * <P>This method is useful when reusing maps. {@linkplain #clear() Clearing a map} leaves the table size untouched. If you are reusing a map many times, you can call this method with a typical 398 * size to avoid keeping around a very large table just because of a few large transient maps. 399 * 400 * @param n the threshold for the trimming. 401 * @return true if there was enough memory to trim the map. 402 * @see #trim() 403 */ 404 public boolean trim(final int n) { 405 final int l = HashCommon.nextPowerOfTwo((int) Math.ceil(n / f)); 406 if (l >= n || size > maxFill(l, f)) return true; 407 try { 408 rehash(l); 409 } catch (Error cantDoIt) { 410 return false; 411 } 412 return true; 413 } 414 415 /** 416 * Rehashes the map. 417 * <p> 418 * <P>This method implements the basic rehashing strategy, and may be overriden by subclasses implementing different rehashing strategies (e.g., disk-based rehashing). However, you should not 419 * override this method unless you understand the internal workings of this class. 420 * 421 * @param newN the new size 422 */ 423 424 protected void rehash(final int newN) { 425 final char key[] = this.key; 426 final char value[] = this.value; 427 final int mask = newN - 1; // Note that this is used by the hashing macro 428 final char newKey[] = new char[newN + 1]; 429 final char newValue[] = new char[newN + 1]; 430 int i = n, pos; 431 for (int j = realSize(); j-- != 0; ) { 432 while (((key[--i]) == ((char) 0))) ; 433 if (!((newKey[pos = (HashCommon.mix((key[i]))) & mask]) == ((char) 0))) 434 while (!((newKey[pos = (pos + 1) & mask]) == ((char) 0))) ; 435 newKey[pos] = key[i]; 436 newValue[pos] = value[i]; 437 } 438 newValue[newN] = value[n]; 439 n = newN; 440 this.mask = mask; 441 maxFill = maxFill(n, f); 442 this.key = newKey; 443 this.value = newValue; 444 } 445 446 /** 447 * Returns a deep copy of this map. 448 * <p> 449 * This method performs a deep copy of this hash map, but with primitive keys and values it doesn't matter much. 450 * @return a deep copy of this map. 451 */ 452 453 public CharCharMap clone() { 454 char[] k = new char[key.length], v = new char[value.length]; 455 System.arraycopy(key, 0, k, 0, key.length); 456 System.arraycopy(value, 0, v, 0, value.length); 457 return new CharCharMap(k, v, f); 458 } 459 460 /** 461 * Returns a hash code for this map. 462 * <p> 463 * This method overrides the generic method provided by the superclass. Since <code>equals()</code> is not overriden, it is important that the value returned by this method is the same value as 464 * the one returned by the overriden method. 465 * 466 * @return a hash code for this map. 467 */ 468 public int hashCode() { 469 int h = 0; 470 for (int j = realSize(), i = 0, t = 0; j-- != 0; ) { 471 while (((key[i]) == ((char) 0))) 472 i++; 473 t = (key[i]); 474 t ^= (value[i]); 475 h += t; 476 i++; 477 } 478 // Zero / null keys have hash zero. 479 if (containsNullKey) h += (value[n]); 480 return h; 481 } 482 483 /** 484 * Returns the maximum number of entries that can be filled before rehashing. 485 * 486 * @param n the size of the backing array. 487 * @param f the load factor. 488 * @return the maximum number of entries before rehashing. 489 */ 490 public static int maxFill(final int n, final float f) { 491 /* We must guarantee that there is always at least 492 * one free entry (even with pathological load factors). */ 493 return Math.min((int) Math.ceil(n * f), n - 1); 494 } 495 496 /** 497 * Returns the maximum number of entries that can be filled before rehashing. 498 * 499 * @param n the size of the backing array. 500 * @param f the load factor. 501 * @return the maximum number of entries before rehashing. 502 */ 503 public static long maxFill(final long n, final float f) { 504 /* We must guarantee that there is always at least 505 * one free entry (even with pathological load factors). */ 506 return Math.min((long) Math.ceil(n * f), n - 1); 507 } 508 509 /** 510 * Returns the least power of two smaller than or equal to 2<sup>30</sup> and larger than or equal to <code>Math.ceil( expected / f )</code>. 511 * 512 * @param expected the expected number of elements in a hash table. 513 * @param f the load factor. 514 * @return the minimum possible size for a backing array. 515 * @throws IllegalArgumentException if the necessary size is larger than 2<sup>30</sup>. 516 */ 517 public static int arraySize(final int expected, final float f) { 518 final long s = Math.max(2, HashCommon.nextPowerOfTwo((long) Math.ceil(expected / f))); 519 if (s > (1 << 30)) 520 throw new IllegalArgumentException("Too large (" + expected + " expected elements with load factor " + f + ")"); 521 return (int) s; 522 } 523 524 private static class HashCommon { 525 526 private HashCommon() { 527 } 528 529 ; 530 531 /** 532 * This reference is used to fill keys and values of removed entries (if 533 * they are objects). <code>null</code> cannot be used as it would confuse the 534 * search algorithm in the presence of an actual <code>null</code> key. 535 */ 536 public static final Object REMOVED = new Object(); 537 538 /** 539 * 2<sup>32</sup> · φ, φ = (√5 − 1)/2. 540 */ 541 private static final int INT_PHI = 0x9E3779B9; 542 /** 543 * The reciprocal of {@link #INT_PHI} modulo 2<sup>32</sup>. 544 */ 545 private static final int INV_INT_PHI = 0x144cbc89; 546 /** 547 * 2<sup>64</sup> · φ, φ = (√5 − 1)/2. 548 */ 549 private static final long LONG_PHI = 0x9E3779B97F4A7C15L; 550 /** 551 * The reciprocal of {@link #LONG_PHI} modulo 2<sup>64</sup>. 552 */ 553 private static final long INV_LONG_PHI = 0xf1de83e19937733dL; 554 555 /** 556 * Avalanches the bits of an integer by applying the finalisation step of MurmurHash3. 557 * <p> 558 * <p>This method implements the finalisation step of Austin Appleby's <a href="http://code.google.com/p/smhasher/">MurmurHash3</a>. 559 * Its purpose is to avalanche the bits of the argument to within 0.25% bias. 560 * 561 * @param x an integer. 562 * @return a hash value with good avalanching properties. 563 */ 564 public final static int murmurHash3(int x) { 565 x ^= x >>> 16; 566 x *= 0x85ebca6b; 567 x ^= x >>> 13; 568 x *= 0xc2b2ae35; 569 x ^= x >>> 16; 570 return x; 571 } 572 573 574 /** 575 * Avalanches the bits of a long integer by applying the finalisation step of MurmurHash3. 576 * <p> 577 * <p>This method implements the finalisation step of Austin Appleby's <a href="http://code.google.com/p/smhasher/">MurmurHash3</a>. 578 * Its purpose is to avalanche the bits of the argument to within 0.25% bias. 579 * 580 * @param x a long integer. 581 * @return a hash value with good avalanching properties. 582 */ 583 public final static long murmurHash3(long x) { 584 x ^= x >>> 33; 585 x *= 0xff51afd7ed558ccdL; 586 x ^= x >>> 33; 587 x *= 0xc4ceb9fe1a85ec53L; 588 x ^= x >>> 33; 589 return x; 590 } 591 592 /** 593 * Quickly mixes the bits of an integer. 594 * <p> 595 * <p>This method mixes the bits of the argument by multiplying by the golden ratio and 596 * xorshifting the result. It is borrowed from <a href="https://github.com/OpenHFT/Koloboke">Koloboke</a>, and 597 * it has slightly worse behaviour than {@link #murmurHash3(int)} (in open-addressing hash tables the average number of probes 598 * is slightly larger), but it's much faster. 599 * 600 * @param x an integer. 601 * @return a hash value obtained by mixing the bits of {@code x}. 602 * @see #invMix(int) 603 */ 604 public final static int mix(final int x) { 605 final int h = x * INT_PHI; 606 return h ^ (h >>> 16); 607 } 608 609 /** 610 * The inverse of {@link #mix(int)}. This method is mainly useful to create unit tests. 611 * 612 * @param x an integer. 613 * @return a value that passed through {@link #mix(int)} would give {@code x}. 614 */ 615 public final static int invMix(final int x) { 616 return (x ^ x >>> 16) * INV_INT_PHI; 617 } 618 619 /** 620 * Quickly mixes the bits of a long integer. 621 * <p> 622 * <p>This method mixes the bits of the argument by multiplying by the golden ratio and 623 * xorshifting twice the result. It is borrowed from <a href="https://github.com/OpenHFT/Koloboke">Koloboke</a>, and 624 * it has slightly worse behaviour than {@link #murmurHash3(long)} (in open-addressing hash tables the average number of probes 625 * is slightly larger), but it's much faster. 626 * 627 * @param x a long integer. 628 * @return a hash value obtained by mixing the bits of {@code x}. 629 */ 630 public final static long mix(final long x) { 631 long h = x * LONG_PHI; 632 h ^= h >>> 32; 633 return h ^ (h >>> 16); 634 } 635 636 /** 637 * The inverse of {@link #mix(long)}. This method is mainly useful to create unit tests. 638 * 639 * @param x a long integer. 640 * @return a value that passed through {@link #mix(long)} would give {@code x}. 641 */ 642 public final static long invMix(long x) { 643 x ^= x >>> 32; 644 x ^= x >>> 16; 645 return (x ^ x >>> 32) * INV_LONG_PHI; 646 } 647 648 649 /** 650 * Returns the hash code that would be returned by {@link Float#hashCode()}. 651 * 652 * @param f a float. 653 * @return the same code as {@link Float#hashCode() new Float(f).hashCode()}. 654 */ 655 656 final public static int float2int(final float f) { 657 return Float.floatToIntBits(f); 658 } 659 660 /** 661 * Returns the hash code that would be returned by {@link Double#hashCode()}. 662 * 663 * @param d a double. 664 * @return the same code as {@link Double#hashCode() new Double(f).hashCode()}. 665 */ 666 667 final public static int double2int(final double d) { 668 final long l = Double.doubleToLongBits(d); 669 return (int) (l ^ (l >>> 32)); 670 } 671 672 /** 673 * Returns the hash code that would be returned by {@link Long#hashCode()}. 674 * 675 * @param l a long. 676 * @return the same code as {@link Long#hashCode() new Long(f).hashCode()}. 677 */ 678 final public static int long2int(final long l) { 679 return (int) (l ^ (l >>> 32)); 680 } 681 682 /** 683 * Return the least power of two greater than or equal to the specified value. 684 * <p> 685 * <p>Note that this function will return 1 when the argument is 0. 686 * 687 * @param x an integer smaller than or equal to 2<sup>30</sup>. 688 * @return the least power of two greater than or equal to the specified value. 689 */ 690 public static int nextPowerOfTwo(int x) { 691 if (x == 0) return 1; 692 x--; 693 x |= x >> 1; 694 x |= x >> 2; 695 x |= x >> 4; 696 x |= x >> 8; 697 return (x | x >> 16) + 1; 698 } 699 700 /** 701 * Return the least power of two greater than or equal to the specified value. 702 * <p> 703 * <p>Note that this function will return 1 when the argument is 0. 704 * 705 * @param x a long integer smaller than or equal to 2<sup>62</sup>. 706 * @return the least power of two greater than or equal to the specified value. 707 */ 708 public static long nextPowerOfTwo(long x) { 709 if (x == 0) return 1; 710 x--; 711 x |= x >> 1; 712 x |= x >> 2; 713 x |= x >> 4; 714 x |= x >> 8; 715 x |= x >> 16; 716 return (x | x >> 32) + 1; 717 } 718 719 /** 720 * Returns the least power of two larger than or equal to <code>Math.ceil( expected / f )</code>. 721 * 722 * @param expected the expected number of elements in a hash table. 723 * @param f the load factor. 724 * @return the minimum possible size for a backing big array. 725 */ 726 public static long bigArraySize(final long expected, final float f) { 727 return nextPowerOfTwo((long) Math.ceil(expected / f)); 728 } 729 } 730}