31# ifndef RESERVOIR_SAMPLING_H
32# define RESERVOIR_SAMPLING_H
50# include <gsl/gsl_rng.h>
94 const unsigned long seed =
static_cast<unsigned long>(
120 <<
"Reservoir_Sampler::update: stream length exceeded size_t capacity";
134 <<
"Reservoir_Sampler::update: stream length exceeds unsigned long capacity";
137 <<
"Reservoir_Sampler::update: stream length exceeds RNG generator range (" <<
rng_range_ <<
")";
157 template <std::input_iterator Itor, std::sentinel_for<Itor> Sent>
205 <<
"set_n_seen_for_testing: inconsistent state: "
206 <<
"n=" << n <<
", k=" <<
k_
208 <<
" (expected " <<
expected <<
")";
243 template <std::input_iterator Itor, std::sentinel_for<Itor> Sent>
246 unsigned long seed =
static_cast<unsigned long>(
249 using T =
typename std::iterator_traits<Itor>::value_type;
Exception handling system with formatted messages for Aleph-w.
#define ah_runtime_error_unless(C)
Throws std::runtime_error if condition does NOT hold.
#define ah_overflow_error_if(C)
Throws std::overflow_error if condition holds.
#define ah_invalid_argument_if(C)
Throws std::invalid_argument if condition holds.
Simple dynamic array with automatic resizing and functional operations.
Incremental reservoir sampler (Algorithm R).
size_t sample_size() const noexcept
Target sample size.
uint64_t rng_range_
Cached range of the RNG (max - min + 1).
void update(const T &val)
Process a new element from the stream.
size_t n_seen_
Total elements seen so far.
void clear()
Reset the sampler while keeping k and seed.
const Array< T > & get_sample() const noexcept
Returns the current random sample.
Reservoir_Sampler(const size_t k, const unsigned long seed=static_cast< unsigned long >(std::chrono::high_resolution_clock::now().time_since_epoch().count()))
Initialize sampler.
void update(Itor beg, Sent end)
Process multiple elements.
uint64_t rng_min_
Minimum value produced by RNG.
uint64_t rng_range() const noexcept
RNG output range (max - min + 1).
GslRngPtr rng_
Random number generator.
Array< T > reservoir_
Current random sample.
uint64_t rng_max_
Maximum value produced by RNG.
void set_n_seen_for_testing(const size_t n)
Forcibly set the internal stream counter (for testing only).
std::unique_ptr< gsl_rng, void(*)(gsl_rng *)> GslRngPtr
size_t total_seen() const noexcept
Total elements processed so far.
unsigned long seed_
Stored seed for RNG reset.
Main namespace for Aleph-w library functions.
auto reservoir_sample(Itor beg, Sent end, size_t k, unsigned long seed=static_cast< unsigned long >(std::chrono::high_resolution_clock::now().time_since_epoch().count()))
Convenience function to sample k elements from a range.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
std::decay_t< typename HeadC::Item_Type > T
Itor upper_bound(Itor beg, Itor end, const T &value)
Find upper bound in a sorted range.
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Dynamic array container with automatic resizing.