81 for (
size_t i = 0; i <
k; ++i)
86 for (
size_t i = 0; i <
k_; ++i)
97 for (
size_t i = 0; i <
k_; ++i)
112 template <std::input_iterator Itor>
130 <<
"MinHash::similarity: signature size mismatch";
133 for (
size_t i = 0; i <
k_; ++i)
137 return static_cast<double>(
matches) /
static_cast<double>(
k_);
156 constexpr uint64_t inf = std::numeric_limits<uint64_t>::max();
157 for (
size_t i = 0; i <
k_; ++i)
173 <<
"MinHash::merge: signature size mismatch ("
175 for (
size_t i = 0; i <
k_; ++i)
Exception handling system with formatted messages for Aleph-w.
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Simple dynamic array with automatic resizing and functional operations.
T & append(const T &data)
Append a copy of data
void reserve(size_t cap)
Reserves cap cells into the array.
MinHash signature generator.
MinHash & merge(const MinHash &other)
Merge another MinHash signature into this one.
size_t size() const noexcept
Number of hash functions used in the signature.
void clear()
Reset the signature to the initial all-maximum state.
void update(const T &val)
Add an element to the set.
double similarity(const MinHash &other) const
Estimate Jaccard similarity with another signature.
MinHash(size_t k=128)
Construct with signature size.
const Array< uint64_t > & get_signature() const noexcept
Returns the current signature.
Array< uint64_t > signature_
Minimized hash values.
Array< uint32_t > seeds_
Random seeds for hash functions.
void update(Itor beg, const Itor &end)
Add all elements in a range to the MinHash signature.
Main namespace for Aleph-w library functions.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
std::decay_t< typename HeadC::Item_Type > T
size_t dft_hash_fct(const Key &key) noexcept
Primary default hash: best speed/quality trade-off.
size_t murmur3hash(const Key &key, std::uint32_t seed)
Dynamic array container with automatic resizing.