36#include <tclap/CmdLine.h>
45 cout <<
"\n" << string(60,
'=') <<
"\n";
46 cout <<
" " << title <<
"\n";
47 cout << string(60,
'=') <<
"\n\n";
52 cout <<
"\n--- " << title <<
" ---\n";
63 cout <<
"A quotient filter tests set membership probabilistically.\n";
64 cout <<
"Like Bloom filters, but also supports DELETION.\n\n";
69 cout <<
"Created quotient filter:\n";
70 cout <<
" Slots (2^q): " <<
qf.capacity() <<
"\n";
71 cout <<
" Remainder bits (r): " <<
static_cast<int>(
qf.r()) <<
"\n";
72 cout <<
" FP rate ≈ 2^{-r}: " << fixed << setprecision(4)
73 << (
qf.false_positive_rate() * 100) <<
"%\n";
74 cout <<
" Memory: " <<
qf.memory_usage() <<
" bytes\n\n";
77 vector<string> words = {
"cafe",
"arepa",
"empanada",
"bandeja",
"sancocho"};
78 for (
const auto &
w : words)
81 cout <<
" Inserted: \"" <<
w <<
"\"\n";
83 cout <<
" Size: " <<
qf.size() <<
"\n";
87 for (
const auto &
w :
tests)
89 bool found =
qf.contains(
w);
90 cout <<
" \"" <<
w <<
"\": "
91 << (found ?
"PROBABLY present" :
"DEFINITELY absent") <<
"\n";
94 print_sub(
"Deletion (Bloom filters can't do this!)");
95 cout <<
" Removing \"arepa\"...\n";
97 cout <<
" contains(\"arepa\"): "
98 << (
qf.contains(
"arepa") ?
"true" :
"false") <<
"\n";
99 cout <<
" Size after removal: " <<
qf.size() <<
"\n";
101 cout <<
"\n Removing \"sancocho\"...\n";
102 qf.remove(
"sancocho");
103 cout <<
" contains(\"sancocho\"): "
104 << (
qf.contains(
"sancocho") ?
"true" :
"false") <<
"\n";
106 cout <<
"\n Remaining elements still present:\n";
107 for (
const auto &
w : words)
109 cout <<
" \"" <<
w <<
"\"\n";
120 cout <<
"FP rate ≈ 2^{-r} where r is the remainder bits.\n\n";
124 cout << setw(5) <<
"r" << setw(12) <<
"Theor FP%"
125 << setw(15) <<
"Observed FP%" << setw(12) <<
"Tested\n";
126 cout << string(44,
'-') <<
"\n";
128 for (
int r_val : {4, 6, 8, 10, 12})
132 for (
size_t i = 0; i < n; ++i)
133 qf.insert(
static_cast<int>(i));
137 for (
size_t i = n + 50000; i < n + 50000 +
test_count; ++i)
138 if (
qf.contains(
static_cast<int>(i)))
141 double theor = 100.0 *
qf.false_positive_rate();
144 cout << setw(5) <<
r_val
145 << setw(12) << fixed << setprecision(4) <<
theor
159 cout <<
"Feature comparison:\n\n";
161 cout << setw(25) <<
"Feature"
162 << setw(20) <<
"Bloom Filter"
163 << setw(20) <<
"Quotient Filter" <<
"\n";
164 cout << string(65,
'-') <<
"\n";
166 auto row = [](
const string &
feat,
const string &
bloom,
const string &
qf)
168 cout << setw(25) <<
feat
170 << setw(20) <<
qf <<
"\n";
173 row(
"Deletion",
"No",
"Yes");
174 row(
"False negatives",
"Never",
"Never");
175 row(
"False positives",
"Yes",
"Yes");
176 row(
"Cache-friendly",
"Poor",
"Excellent");
177 row(
"Mergeable",
"Yes (OR)",
"Yes (union)");
178 row(
"Space per element",
"~10 bits/1%",
"(r+3) bits");
179 row(
"Resize",
"Rebuild",
"Rebuild");
181 cout <<
"\nQuotient filter with r=8: FP ≈ 0.4%, 11 bits/element.\n";
182 cout <<
"Bloom filter at 0.4%: ~11 bits/element, ~8 hash functions.\n";
183 cout <<
"→ Similar space, but QF supports deletion and is cache-friendly.\n";
194 cout <<
"Two filters with the same (q, r, seed) can be merged.\n\n";
200 for (
int i = 0; i < 100; ++i)
202 for (
int i = 100; i < 200; ++i)
205 cout <<
"Filter A: " << a.
size() <<
" elements (0..99)\n";
206 cout <<
"Filter B: " << b.
size() <<
" elements (100..199)\n";
209 cout <<
"\nAfter merge: " << a.
size() <<
" elements\n";
212 for (
int i = 0; i < 200; ++i)
216 cout <<
"Elements 0..199 found: " << found <<
"/200\n";
227 cout <<
"Use a quotient filter as a dedup cache for packet IDs.\n";
228 cout <<
"Old entries can be deleted (unlike Bloom filters).\n\n";
232 cout <<
"Filter for ~10000 packets at 1% FP:\n";
233 cout <<
" q=" <<
static_cast<int>(
qf.q())
234 <<
" r=" <<
static_cast<int>(
qf.r())
235 <<
" capacity=" <<
qf.capacity()
236 <<
" memory=" <<
qf.memory_usage() / 1024 <<
" KB\n\n";
245 cout <<
"Inserted window [0, 5000): size=" <<
qf.size()
246 <<
" load=" << fixed << setprecision(2)
247 << (
qf.load_factor() * 100) <<
"%\n";
256 cout <<
"Slid window to [2000, 7000): size=" <<
qf.size()
257 <<
" load=" << fixed << setprecision(2)
258 << (
qf.load_factor() * 100) <<
"%\n";
270 cout <<
"\nDedup check for 500 IDs in window:\n";
271 cout <<
" Caught as duplicates: " <<
dupes_caught <<
"/500\n";
272 cout <<
" Missed (false neg): " <<
dupes_missed <<
"/500\n";
284 "Quotient filter example for Aleph-w.\n"
285 "Demonstrates probabilistic set with deletion support.",
290 "Run section: basic, fp, vs, merge, cache, or 'all'",
291 false,
"all",
"section",
cmd);
297 cout <<
"============================================================\n";
298 cout <<
" ALEPH-W QUOTIENT FILTER EXAMPLE\n";
299 cout <<
"============================================================\n";
307 cout <<
"\n" << string(60,
'=') <<
"\n";
308 cout <<
"Quotient filter demo completed!\n";
309 cout << string(60,
'=') <<
"\n\n";
313 catch (TCLAP::ArgException & e)
315 cerr <<
"Error: " << e.error() <<
" for " << e.argId() <<
endl;
318 catch (exception & e)
320 cerr <<
"Error: " << e.what() <<
endl;
Quotient filter for probabilistic set membership with deletion.
Quotient_Filter & insert(const T &item)
Insert an element.
static Quotient_Filter from_capacity(size_t expected_n, double fp_rate, uint32_t seed=0x5F3759DF)
Construct from desired capacity and false positive rate.
void merge(const Quotient_Filter &other)
Merge another filter (same q, r, seed).
size_t size() const noexcept
Number of elements.
bool contains(const T &item) const noexcept
Test membership.
Main namespace for Aleph-w library functions.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
static void section(const string &title)
Quotient filter: a cache-friendly probabilistic set with deletion.
void print_section(const string &title)
void print_sub(const string &title)