101# ifndef BLOOM_FILTER_H
102# define BLOOM_FILTER_H
112# include <gsl/gsl_rng.h>
123 template <
typename T>
170 static std::tuple<size_t, size_t>
estimate(
const size_t n,
const double p)
174 const double log2 = std::log(2);
175 const auto m =
static_cast<size_t>(std::ceil(-(n * std::log(p)) / (
log2 *
log2)));
176 const auto k =
static_cast<size_t>(std::ceil((1.0 * m / n) *
log2));
177 return std::make_tuple(m,
k);
209 for (
size_t i = 0; i <
bits.
size(); ++i)
247 const unsigned long seed =
static_cast<unsigned long>(std::time(
nullptr)))
261 for (
auto & s:
seeds)
276 unsigned long seed =
static_cast<unsigned long>(std::time(
nullptr)),
356 for (
const auto & seed:
seeds)
380 for (
const auto & seed:
seeds)
395 for (
const auto & seed:
seeds)
409 for (
const auto & seed:
seeds)
434 for (
size_t i = 0; i <
bits.
size(); ++i)
452 const auto m =
static_cast<double>(
capacity());
453 const auto k =
static_cast<double>(
get_k());
454 if (m <= 0
or k <= 0)
457 const double p0 = 1.0 - (
static_cast<double>(x) / m);
459 return std::numeric_limits<size_t>::max();
461 const double n = std::ceil(-(m * std::log(
p0)) /
k);
462 if (
not std::isfinite(n)
or n < 0)
463 return std::numeric_limits<size_t>::max();
465 return static_cast<size_t>(n);
487 <<
"Bloom filter have different hashes";
491 const auto x =
get_x();
506 <<
"Bloom filter have different hashes";
Exception handling system with formatted messages for Aleph-w.
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
#define ah_invalid_argument_if(C)
Throws std::invalid_argument if condition holds.
#define ah_bad_alloc_if(C)
Throws std::bad_alloc if condition holds.
Functional programming utilities for Aleph-w containers.
Space-efficient bit array implementation.
Contiguous array of bits.
void fast_write(const size_t i, const unsigned int value)
int fast_read(const size_t i) const noexcept
void reserve(const size_t dim)
Reserve memory in advance for the bit array dim dimension.
constexpr size_t size() const noexcept
Returns the dimension of the bit array.
void swap(BitArray &array) noexcept
Bloom filter implementation.
friend Bloom_Filter operator|(const Bloom_Filter &f1, const Bloom_Filter &f2)
Return the union of two compatible Bloom filters.
size_t size() const noexcept
Return the number of insertions.
auto & operator&=(const Bloom_Filter &f)
In-place intersection.
DynList< size_t > common_hashes(const T &i1, const T &i2) const
Return the intersection of hashes(i1) and hashes(i2).
Bloom_Filter(const size_t n, const double p, unsigned long seed=static_cast< unsigned long >(std::time(nullptr)), Hash_Fct __hash_fct=dft_hash_fct)
Construct a Bloom filter using a desired capacity in insertions.
friend Bloom_Filter operator&(const Bloom_Filter &f1, const Bloom_Filter &f2)
Return the intersection of two compatible Bloom filters.
size_t expected_size() const noexcept
Convenience overload.
bool have_same_hashes(const Bloom_Filter &f) const noexcept
static std::tuple< size_t, size_t > estimate(const size_t n, const double p)
Estimate the parameters of a Bloom filter.
size_t expected_size(const size_t x) const noexcept
Estimate the number of inserted items from the number of bits set.
std::vector< unsigned long > seeds
size_t get_n() const noexcept
Return the number of insertions performed on the filter.
DynList< size_t > set_bits() const
Return the indexes of bits set to 1.
bool contains(const T &item) const noexcept
Test membership.
size_t get_m() const noexcept
Return the number of bits of the filter.
auto & operator|=(const Bloom_Filter &f)
In-place union.
size_t(*)(const T &, unsigned long seed) Hash_Fct
Bloom_Filter(const size_t dim, size_t __num_hash, Hash_Fct __hash_fct=dft_hash_fct, const unsigned long seed=static_cast< unsigned long >(std::time(nullptr)))
Construct a Bloom filter with explicit parameters.
DynList< size_t > hash_seeds() const
Return the internally generated per-hash seeds.
virtual ~Bloom_Filter()=default
Destructor.
Bloom_Filter(const Bloom_Filter &f)
Copy constructor.
Bloom_Filter(Bloom_Filter &&f) noexcept
Move constructor.
size_t capacity() const noexcept
Return the capacity in bits.
DynList< size_t > hashes(const T &item) const
Return the bit positions used by item.
void swap(Bloom_Filter &f) noexcept
Swap this with f in O(1).
size_t get_k() const noexcept
Return the number of hash functions used by the filter.
size_t get_x() const noexcept
Return the number of bits set to 1.
auto & operator=(const Bloom_Filter &f)
Copy assignment.
auto & insert(const T &item) noexcept
Insert an item.
auto & append(const T &item) noexcept
Alias of insert().
Dynamic singly linked list with functional programming support.
T & append(const T &item)
Append a new item by copy.
__gmp_expr< T, __gmp_unary_expr< __gmp_expr< T, U >, __gmp_log2_function > > log2(const __gmp_expr< T, U > &expr)
__gmp_expr< typename __gmp_resolve_expr< T, V >::value_type, __gmp_binary_expr< __gmp_expr< T, U >, __gmp_expr< V, W >, __gmp_dim_function > > dim(const __gmp_expr< T, U > &expr1, const __gmp_expr< V, W > &expr2)
Collection of general-purpose hash functions.
Main namespace for Aleph-w library functions.
DynList< T > intercept(const Container< T > &c1, const Container< T > &c2)
std::decay_t< typename HeadC::Item_Type > T
size_t dft_hash_fct(const Key &key) noexcept
DynList< T > maps(const C &c, Op op)
Classic map operation.
T sum(const Container &container, const T &init=T{})
Compute sum of all elements.
Dynamic set implementations based on hash tables.