Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
quotient_filter_example.C
Go to the documentation of this file.
1
30#include <iostream>
31#include <iomanip>
32#include <string>
33#include <vector>
34#include <cmath>
35
36#include <tclap/CmdLine.h>
37
38#include <quotient-filter.H>
39
40using namespace std;
41using namespace Aleph;
42
43void print_section(const string & title)
44{
45 cout << "\n" << string(60, '=') << "\n";
46 cout << " " << title << "\n";
47 cout << string(60, '=') << "\n\n";
48}
49
50void print_sub(const string & title)
51{
52 cout << "\n--- " << title << " ---\n";
53}
54
55// =====================================================================
56// 1. Basic usage
57// =====================================================================
58
60{
61 print_section("BASIC QUOTIENT FILTER USAGE");
62
63 cout << "A quotient filter tests set membership probabilistically.\n";
64 cout << "Like Bloom filters, but also supports DELETION.\n\n";
65
66 // q=10 → 1024 slots, r=8 → FP ≈ 1/256 ≈ 0.4%
68
69 cout << "Created quotient filter:\n";
70 cout << " Slots (2^q): " << qf.capacity() << "\n";
71 cout << " Remainder bits (r): " << static_cast<int>(qf.r()) << "\n";
72 cout << " FP rate ≈ 2^{-r}: " << fixed << setprecision(4)
73 << (qf.false_positive_rate() * 100) << "%\n";
74 cout << " Memory: " << qf.memory_usage() << " bytes\n\n";
75
76 print_sub("Inserting elements");
77 vector<string> words = {"cafe", "arepa", "empanada", "bandeja", "sancocho"};
78 for (const auto & w : words)
79 {
80 qf.insert(w);
81 cout << " Inserted: \"" << w << "\"\n";
82 }
83 cout << " Size: " << qf.size() << "\n";
84
85 print_sub("Membership queries");
86 vector<string> tests = {"cafe", "pizza", "arepa", "hamburguesa", "sancocho"};
87 for (const auto & w : tests)
88 {
89 bool found = qf.contains(w);
90 cout << " \"" << w << "\": "
91 << (found ? "PROBABLY present" : "DEFINITELY absent") << "\n";
92 }
93
94 print_sub("Deletion (Bloom filters can't do this!)");
95 cout << " Removing \"arepa\"...\n";
96 qf.remove("arepa");
97 cout << " contains(\"arepa\"): "
98 << (qf.contains("arepa") ? "true" : "false") << "\n";
99 cout << " Size after removal: " << qf.size() << "\n";
100
101 cout << "\n Removing \"sancocho\"...\n";
102 qf.remove("sancocho");
103 cout << " contains(\"sancocho\"): "
104 << (qf.contains("sancocho") ? "true" : "false") << "\n";
105
106 cout << "\n Remaining elements still present:\n";
107 for (const auto & w : words)
108 if (qf.contains(w))
109 cout << " \"" << w << "\"\n";
110}
111
112// =====================================================================
113// 2. False positive rate
114// =====================================================================
115
117{
118 print_section("FALSE POSITIVE RATE ANALYSIS");
119
120 cout << "FP rate ≈ 2^{-r} where r is the remainder bits.\n\n";
121
122 size_t n = 500;
123
124 cout << setw(5) << "r" << setw(12) << "Theor FP%"
125 << setw(15) << "Observed FP%" << setw(12) << "Tested\n";
126 cout << string(44, '-') << "\n";
127
128 for (int r_val : {4, 6, 8, 10, 12})
129 {
130 Quotient_Filter<int> qf(12, static_cast<uint8_t>(r_val));
131
132 for (size_t i = 0; i < n; ++i)
133 qf.insert(static_cast<int>(i));
134
135 size_t test_count = 10000;
136 size_t fps = 0;
137 for (size_t i = n + 50000; i < n + 50000 + test_count; ++i)
138 if (qf.contains(static_cast<int>(i)))
139 ++fps;
140
141 double theor = 100.0 * qf.false_positive_rate();
142 double empir = 100.0 * static_cast<double>(fps) / test_count;
143
144 cout << setw(5) << r_val
145 << setw(12) << fixed << setprecision(4) << theor
146 << setw(15) << empir
147 << setw(12) << test_count << "\n";
148 }
149}
150
151// =====================================================================
152// 3. Comparison with Bloom filter
153// =====================================================================
154
156{
157 print_section("QUOTIENT FILTER vs BLOOM FILTER");
158
159 cout << "Feature comparison:\n\n";
160
161 cout << setw(25) << "Feature"
162 << setw(20) << "Bloom Filter"
163 << setw(20) << "Quotient Filter" << "\n";
164 cout << string(65, '-') << "\n";
165
166 auto row = [](const string & feat, const string & bloom, const string & qf)
167 {
168 cout << setw(25) << feat
169 << setw(20) << bloom
170 << setw(20) << qf << "\n";
171 };
172
173 row("Deletion", "No", "Yes");
174 row("False negatives", "Never", "Never");
175 row("False positives", "Yes", "Yes");
176 row("Cache-friendly", "Poor", "Excellent");
177 row("Mergeable", "Yes (OR)", "Yes (union)");
178 row("Space per element", "~10 bits/1%", "(r+3) bits");
179 row("Resize", "Rebuild", "Rebuild");
180
181 cout << "\nQuotient filter with r=8: FP ≈ 0.4%, 11 bits/element.\n";
182 cout << "Bloom filter at 0.4%: ~11 bits/element, ~8 hash functions.\n";
183 cout << "→ Similar space, but QF supports deletion and is cache-friendly.\n";
184}
185
186// =====================================================================
187// 4. Merge
188// =====================================================================
189
191{
192 print_section("MERGING QUOTIENT FILTERS");
193
194 cout << "Two filters with the same (q, r, seed) can be merged.\n\n";
195
196 uint32_t seed = 12345;
197 Quotient_Filter<int> a(10, 8, seed);
198 Quotient_Filter<int> b(10, 8, seed);
199
200 for (int i = 0; i < 100; ++i)
201 a.insert(i);
202 for (int i = 100; i < 200; ++i)
203 b.insert(i);
204
205 cout << "Filter A: " << a.size() << " elements (0..99)\n";
206 cout << "Filter B: " << b.size() << " elements (100..199)\n";
207
208 a.merge(b);
209 cout << "\nAfter merge: " << a.size() << " elements\n";
210
211 size_t found = 0;
212 for (int i = 0; i < 200; ++i)
213 if (a.contains(i))
214 ++found;
215
216 cout << "Elements 0..199 found: " << found << "/200\n";
217}
218
219// =====================================================================
220// 5. Practical: network dedup cache
221// =====================================================================
222
224{
225 print_section("PRACTICAL: Network Packet Deduplication");
226
227 cout << "Use a quotient filter as a dedup cache for packet IDs.\n";
228 cout << "Old entries can be deleted (unlike Bloom filters).\n\n";
229
230 auto qf = Quotient_Filter<int>::from_capacity(10000, 0.01);
231
232 cout << "Filter for ~10000 packets at 1% FP:\n";
233 cout << " q=" << static_cast<int>(qf.q())
234 << " r=" << static_cast<int>(qf.r())
235 << " capacity=" << qf.capacity()
236 << " memory=" << qf.memory_usage() / 1024 << " KB\n\n";
237
238 // Simulate a stream of packet IDs
239 int window_start = 0;
240 int window_size = 5000;
241
242 // Insert first window
243 for (int id = window_start; id < window_start + window_size; ++id)
244 qf.insert(id);
245 cout << "Inserted window [0, 5000): size=" << qf.size()
246 << " load=" << fixed << setprecision(2)
247 << (qf.load_factor() * 100) << "%\n";
248
249 // Slide window: remove old, add new
250 int new_start = 2000;
251 for (int id = window_start; id < new_start; ++id)
252 qf.remove(id);
253 for (int id = window_size; id < window_size + new_start; ++id)
254 qf.insert(id);
255
256 cout << "Slid window to [2000, 7000): size=" << qf.size()
257 << " load=" << fixed << setprecision(2)
258 << (qf.load_factor() * 100) << "%\n";
259
260 // Check dedup
261 size_t dupes_caught = 0;
262 size_t dupes_missed = 0;
263 for (int id = new_start; id < new_start + 500; ++id)
264 {
265 if (qf.contains(id))
266 ++dupes_caught;
267 else
268 ++dupes_missed;
269 }
270 cout << "\nDedup check for 500 IDs in window:\n";
271 cout << " Caught as duplicates: " << dupes_caught << "/500\n";
272 cout << " Missed (false neg): " << dupes_missed << "/500\n";
273}
274
275// =====================================================================
276// Main
277// =====================================================================
278
279int main(int argc, char * argv[])
280{
281 try
282 {
283 TCLAP::CmdLine cmd(
284 "Quotient filter example for Aleph-w.\n"
285 "Demonstrates probabilistic set with deletion support.",
286 ' ', "1.0");
287
288 TCLAP::ValueArg<string> sectionArg(
289 "s", "section",
290 "Run section: basic, fp, vs, merge, cache, or 'all'",
291 false, "all", "section", cmd);
292
293 cmd.parse(argc, argv);
294 string section = sectionArg.getValue();
295
296 cout << "\n";
297 cout << "============================================================\n";
298 cout << " ALEPH-W QUOTIENT FILTER EXAMPLE\n";
299 cout << "============================================================\n";
300
301 if (section == "all" or section == "basic") demo_basic();
302 if (section == "all" or section == "fp") demo_fp();
303 if (section == "all" or section == "vs") demo_comparison();
304 if (section == "all" or section == "merge") demo_merge();
305 if (section == "all" or section == "cache") demo_cache();
306
307 cout << "\n" << string(60, '=') << "\n";
308 cout << "Quotient filter demo completed!\n";
309 cout << string(60, '=') << "\n\n";
310
311 return 0;
312 }
313 catch (TCLAP::ArgException & e)
314 {
315 cerr << "Error: " << e.error() << " for " << e.argId() << endl;
316 return 1;
317 }
318 catch (exception & e)
319 {
320 cerr << "Error: " << e.what() << endl;
321 return 1;
322 }
323}
long double w
Definition btreepic.C:153
Quotient filter for probabilistic set membership with deletion.
Quotient_Filter & insert(const T &item)
Insert an element.
static Quotient_Filter from_capacity(size_t expected_n, double fp_rate, uint32_t seed=0x5F3759DF)
Construct from desired capacity and false positive rate.
void merge(const Quotient_Filter &other)
Merge another filter (same q, r, seed).
size_t size() const noexcept
Number of elements.
bool contains(const T &item) const noexcept
Test membership.
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
STL namespace.
static void section(const string &title)
Quotient filter: a cache-friendly probabilistic set with deletion.
void demo_cache()
void demo_fp()
void print_section(const string &title)
void demo_merge()
void demo_comparison()
void demo_basic()
void print_sub(const string &title)
void tests()
Definition test-dry.C:245
CmdLine cmd
Definition testHash.C:43
ValueArg< size_t > seed
Definition testHash.C:48