80 static_assert(
sizeof(size_t) >= 8,
81 "HyperLogLog requires 64-bit size_t; murmur3hash is 32-bit on this platform");
91 <<
"HyperLogLog: b must be in range [4, 16] (got " <<
static_cast<int>(b) <<
")";
92 return static_cast<size_t>(1) << b;
100 if (
m == 16)
return 0.673;
101 if (
m == 32)
return 0.697;
102 if (
m == 64)
return 0.709;
103 return 0.7213 / (1.0 + 1.079 /
static_cast<double>(
m));
129 const size_t j = x >> (64 -
b_);
150 for (
size_t i = 0; i <
m_; ++i)
153 z += std::ldexp(1.0, -
static_cast<int>(
registers_[i]));
159 double e =
alpha_m_ * (
static_cast<double>(
m_) *
static_cast<double>(
m_)) / z;
162 if (e <= 2.5 *
static_cast<double>(
m_))
165 e =
static_cast<double>(
m_) * std::log(
static_cast<double>(
m_) /
static_cast<double>(v));
170 static const double two_to_64 = std::ldexp(1.0, 64);
198 for (
size_t i = 0; i <
m_; ++i)
210 for (
size_t i = 0; i <
m_; ++i)
Exception handling system with formatted messages for Aleph-w.
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Simple dynamic array with automatic resizing and functional operations.
HyperLogLog cardinality estimator.
double estimate() const noexcept
Estimate current cardinality.
Array< uint8_t > registers_
Max leading zeros observed per bucket.
HyperLogLog(const uint8_t b=12)
Construct with precision parameter.
static size_t compute_m_or_throw(const uint8_t b)
Validate b and compute m = 2^b.
void clear()
Reset all registers to zero.
double alpha_m_
Bias correction constant.
static uint64_t hash64(const T &val) noexcept
Compute 64-bit hash using MurmurHash3.
static double compute_alpha(const size_t m) noexcept
Compute bias-correction constant alpha_m from Flajolet et al.
void merge(const HyperLogLog &other)
Merge another HyperLogLog into this one (union of sets).
void update(const T &val)
Add an element to the set.
size_t m_
Number of registers (2^b_).
size_t num_registers() const noexcept
Number of HyperLogLog registers (2^b).
uint8_t b_
Number of bits for bucket index.
Main namespace for Aleph-w library functions.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
std::decay_t< typename HeadC::Item_Type > T
size_t murmur3hash(const Key &key, std::uint32_t seed)
FooMap m(5, fst_unit_pair_hash, snd_unit_pair_hash)
Dynamic array container with automatic resizing.