31# ifndef COUNT_MIN_SKETCH_H
32# define COUNT_MIN_SKETCH_H
69 template <
typename T, std::
unsigned_
integral CounterT =
size_t>
87 <<
"Count_Min_Sketch: dimensions must be positive";
90 <<
"Count_Min_Sketch: product of dimensions w*d overflows size_t (got "
91 <<
w <<
" * " << d <<
")";
96 for (
size_t i = 0; i <
d_; ++i)
113 <<
"Count_Min_Sketch::from_error_bounds: epsilon must be positive and finite (got " << epsilon <<
")";
115 <<
"Count_Min_Sketch::from_error_bounds: delta must be in (0, 1) and finite (got " << delta <<
")";
117 const double w_dbl = std::ceil(std::exp(1.0) / epsilon);
118 const double d_dbl = std::ceil(std::log(1.0 / delta));
119 const double max_sz =
static_cast<double>(std::numeric_limits<size_t>::max());
122 <<
"Count_Min_Sketch::from_error_bounds: calculated dimensions exceed size_t limits";
133 if (
count == 0)
return;
135 constexpr CounterT max_val = std::numeric_limits<CounterT>::max();
137 for (
size_t i = 0; i <
d_; ++i)
140 const size_t idx = i *
w_ +
h;
158 CounterT min_val = std::numeric_limits<CounterT>::max();
159 for (
size_t i = 0; i <
d_; ++i)
162 min_val = std::min(min_val,
table_[i *
w_ +
h]);
173 <<
"Count_Min_Sketch::merge: dimension mismatch";
175 for (
size_t i = 0; i <
d_; ++i)
177 <<
"Count_Min_Sketch::merge: hash seed mismatch at index " << i;
179 constexpr CounterT max_val = std::numeric_limits<CounterT>::max();
181 for (
size_t i = 0; i <
table_.size(); ++i)
198 for (
size_t i = 0; i <
table_.size(); ++i)
Exception handling system with formatted messages for Aleph-w.
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Simple dynamic array with automatic resizing and functional operations.
T & append(const T &data)
Append a copy of data
void reserve(size_t cap)
Reserves cap cells into the array.
Count-Min Sketch frequency estimator.
void merge(const Count_Min_Sketch &other)
Merge another sketch into this one.
size_t w_
Width of the sketch.
size_t d_
Depth (number of hash functions).
size_t depth() const noexcept
Number of hash functions (independent counter rows).
Array< CounterT > table_
Flat array for counters [d_ x w_].
CounterT estimate(const T &val) const
Estimate frequency of an element.
CounterT total_count() const noexcept
Sum of all event weights inserted so far.
void clear()
Clear all counters.
static Count_Min_Sketch from_error_bounds(const double epsilon, const double delta)
Create a sketch with target error bounds.
void update(const T &val, CounterT count=1)
Increment count for an element.
size_t width() const noexcept
Number of counter columns (hash buckets per row).
CounterT total_count_
Total updates processed.
Count_Min_Sketch(const size_t w, const size_t d)
Construct with explicit dimensions.
Array< uint32_t > seeds_
Random seeds for hash functions.
Main namespace for Aleph-w library functions.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
std::decay_t< typename HeadC::Item_Type > T
size_t dft_hash_fct(const Key &key) noexcept
Primary default hash: best speed/quality trade-off.
size_t murmur3hash(const Key &key, std::uint32_t seed)
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Dynamic array container with automatic resizing.