public class HyperLogLog extends Object implements NumDistinctValueEstimator
This is an implementation of the following variants of hyperloglog (HLL)
algorithm
Original - Original HLL algorithm from Flajolet et. al from
http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
HLLNoBias - Google's implementation of bias correction based on lookup table
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
HLL++ - Google's implementation of HLL++ algorithm that uses SPARSE registers
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
Following are the constructor parameters that determines which algorithm is
used
numRegisterIndexBits - number of LSB hashcode bits to be used as register index.
Default is 14. min = 4 and max = 16
numHashBits - number of bits for hashcode. Default is 64. min = 32 and max = 128
encoding - Type of encoding to use (SPARSE or DENSE). The algorithm automatically
switches to DENSE beyond a threshold. Default: SPARSE
enableBitPacking - To enable bit packing or not. Bit packing improves compression
at the cost of more CPU cycles. Default: true
noBias - Use Google's bias table lookup for short range bias correction.
Enabling this will highly improve the estimation accuracy for short
range values. Default: true
| Modifier and Type | Class and Description |
|---|---|
static class |
HyperLogLog.EncodingType |
static class |
HyperLogLog.HyperLogLogBuilder |
LOG| Modifier and Type | Method and Description |
|---|---|
void |
add(long hashcode) |
void |
addBoolean(boolean val) |
void |
addByte(byte val) |
void |
addBytes(byte[] val) |
void |
addBytes(byte[] value,
int offset,
int length) |
void |
addChar(char val) |
void |
addDouble(double val) |
void |
addFloat(float val) |
void |
addInt(int val) |
void |
addLong(long val) |
void |
addShort(short val) |
void |
addString(String val)
Java's default charset will be used for strings.
|
void |
addString(String val,
Charset charset) |
void |
addToEstimator(byte[] value,
int offset,
int length) |
void |
addToEstimator(double d) |
void |
addToEstimator(HiveDecimal decimal) |
void |
addToEstimator(long v) |
void |
addToEstimator(String s) |
static HyperLogLog.HyperLogLogBuilder |
builder() |
boolean |
canMerge(NumDistinctValueEstimator o) |
long |
count() |
NumDistinctValueEstimator |
deserialize(byte[] buf) |
boolean |
equals(Object obj) |
long |
estimateNumDistinctValues() |
HyperLogLog.EncodingType |
getEncoding() |
int |
getEncodingSwitchThreshold() |
HLLDenseRegister |
getHLLDenseRegister() |
HLLSparseRegister |
getHLLSparseRegister() |
int |
getNumRegisterIndexBits() |
double |
getStandardError() |
int |
hashCode() |
int |
lengthFor(JavaDataModel model) |
void |
merge(HyperLogLog hll)
Merge the specified hyperloglog to the current one.
|
void |
mergeEstimators(NumDistinctValueEstimator o) |
void |
reset() |
byte[] |
serialize() |
void |
setCount(long count) |
void |
setEncoding(HyperLogLog.EncodingType encoding) |
void |
setHLLDenseRegister(byte[] reg)
Reconstruct dense registers from byte array
|
void |
setHLLSparseRegister(int[] reg)
Reconstruct sparse map from serialized integer list
|
HyperLogLog |
squash(int p0)
Reduces the accuracy of the HLL provided to a smaller size
|
String |
toString() |
String |
toStringExtended() |
public static HyperLogLog.HyperLogLogBuilder builder()
public void addBoolean(boolean val)
public void addByte(byte val)
public void addBytes(byte[] val)
public void addShort(short val)
public void addInt(int val)
public void addLong(long val)
public void addFloat(float val)
public void addDouble(double val)
public void addChar(char val)
public void addString(String val)
val - - input stringpublic void addBytes(byte[] value,
int offset,
int length)
public void add(long hashcode)
public long estimateNumDistinctValues()
estimateNumDistinctValues in interface NumDistinctValueEstimatorpublic long count()
public void setCount(long count)
public double getStandardError()
public HLLDenseRegister getHLLDenseRegister()
public HLLSparseRegister getHLLSparseRegister()
public void setHLLSparseRegister(int[] reg)
reg - - uncompressed and delta decoded integer listpublic void setHLLDenseRegister(byte[] reg)
reg - - unpacked byte arraypublic void merge(HyperLogLog hll)
hll - - hyperloglog to be mergedIllegalArgumentExceptionpublic HyperLogLog squash(int p0)
p0 - - new p size for the new HyperLogLog (smaller or no change)public String toStringExtended()
public int getNumRegisterIndexBits()
public HyperLogLog.EncodingType getEncoding()
public void setEncoding(HyperLogLog.EncodingType encoding)
public void reset()
reset in interface NumDistinctValueEstimatorpublic byte[] serialize()
serialize in interface NumDistinctValueEstimatorpublic NumDistinctValueEstimator deserialize(byte[] buf)
deserialize in interface NumDistinctValueEstimatorpublic void addToEstimator(long v)
addToEstimator in interface NumDistinctValueEstimatorpublic void addToEstimator(String s)
addToEstimator in interface NumDistinctValueEstimatorpublic void addToEstimator(double d)
addToEstimator in interface NumDistinctValueEstimatorpublic void addToEstimator(byte[] value,
int offset,
int length)
addToEstimator in interface NumDistinctValueEstimatorpublic void addToEstimator(HiveDecimal decimal)
addToEstimator in interface NumDistinctValueEstimatorpublic void mergeEstimators(NumDistinctValueEstimator o)
mergeEstimators in interface NumDistinctValueEstimatorpublic int lengthFor(JavaDataModel model)
lengthFor in interface NumDistinctValueEstimatorpublic boolean canMerge(NumDistinctValueEstimator o)
canMerge in interface NumDistinctValueEstimatorpublic int getEncodingSwitchThreshold()
Copyright © 2024 The Apache Software Foundation. All rights reserved.