Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
quotient-filter.cc
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
36#include <gtest/gtest.h>
37
38#include <quotient-filter.H>
39
40#include <cstddef>
41#include <string>
42#include <set>
43#include <random>
44
45using namespace Aleph;
46using namespace std;
47
48// =====================================================================
49// Construction
50// =====================================================================
51
53{
55 EXPECT_EQ(qf.q(), 4);
56 EXPECT_EQ(qf.r(), 8);
57 EXPECT_EQ(qf.capacity(), 16u);
58 EXPECT_EQ(qf.size(), 0u);
59 EXPECT_TRUE(qf.is_empty());
60}
61
63{
64 EXPECT_THROW((Quotient_Filter<int>(0, 8)), std::domain_error);
65 EXPECT_THROW((Quotient_Filter<int>(33, 8)), std::domain_error);
66}
67
69{
70 EXPECT_THROW((Quotient_Filter<int>(4, 0)), std::domain_error);
71 EXPECT_THROW((Quotient_Filter<int>(4, 61)), std::domain_error);
72}
73
78
80{
81 auto qf = Quotient_Filter<int>::from_capacity(1000, 0.01);
82 EXPECT_GE(qf.capacity(), 1000u);
83 EXPECT_LE(qf.false_positive_rate(), 0.02);
84}
85
95
96// =====================================================================
97// Basic insert / contains
98// =====================================================================
99
101{
103
104 for (int i = 0; i < 100; ++i)
105 qf.insert(i);
106
107 EXPECT_EQ(qf.size(), 100u);
108
109 for (int i = 0; i < 100; ++i)
110 EXPECT_TRUE(qf.contains(i)) << "False negative for i=" << i;
111}
112
114{
115 Quotient_Filter<int> qf(12, 10);
116
117 for (int i = 0; i < 100; ++i)
118 qf.insert(i);
119
120 size_t false_positives = 0;
121 for (int i = 1000; i < 2000; ++i)
122 if (qf.contains(i))
124
125 // With r=10, FP rate ≈ 1/1024 ≈ 0.1%.
126 // Testing 1000 elements: expect < 10 false positives.
128 << "Too many false positives: " << false_positives;
129}
130
132{
134
135 qf.insert(42);
136 EXPECT_EQ(qf.size(), 1u);
137
138 qf.insert(42);
139 EXPECT_EQ(qf.size(), 1u);
140
141 EXPECT_TRUE(qf.contains(42));
142}
143
145{
147
148 qf.insert("hello");
149 qf.insert("world");
150 qf.insert("aleph");
151
152 EXPECT_TRUE(qf.contains("hello"));
153 EXPECT_TRUE(qf.contains("world"));
154 EXPECT_TRUE(qf.contains("aleph"));
155
156 EXPECT_EQ(qf.size(), 3u);
157}
158
159// =====================================================================
160// Deletion
161// =====================================================================
162
164{
166
167 qf.insert(10);
168 qf.insert(20);
169 qf.insert(30);
170 EXPECT_EQ(qf.size(), 3u);
171
172 EXPECT_TRUE(qf.remove(20));
173 EXPECT_EQ(qf.size(), 2u);
174 EXPECT_FALSE(qf.contains(20));
175 EXPECT_TRUE(qf.contains(10));
176 EXPECT_TRUE(qf.contains(30));
177}
178
180{
182
183 qf.insert(1);
184 EXPECT_FALSE(qf.remove(999));
185 EXPECT_EQ(qf.size(), 1u);
186 EXPECT_TRUE(qf.contains(1));
187}
188
190{
192
193 for (int i = 0; i < 50; ++i)
194 qf.insert(i);
195
196 for (int i = 0; i < 50; ++i)
197 EXPECT_TRUE(qf.remove(i)) << "Failed to remove i=" << i;
198
199 EXPECT_EQ(qf.size(), 0u);
200 EXPECT_TRUE(qf.is_empty());
201}
202
204{
206
207 qf.insert(5);
208 qf.remove(5);
209 EXPECT_FALSE(qf.contains(5));
210
211 qf.insert(5);
212 EXPECT_TRUE(qf.contains(5));
213 EXPECT_EQ(qf.size(), 1u);
214}
215
216// =====================================================================
217// Capacity and overflow
218// =====================================================================
219
221{
222 Quotient_Filter<int> qf(4, 8); // 16 slots
223 EXPECT_DOUBLE_EQ(qf.load_factor(), 0.0);
224
225 for (int i = 0; i < 8; ++i)
226 qf.insert(i);
227
228 EXPECT_DOUBLE_EQ(qf.load_factor(), 0.5);
229}
230
232{
233 Quotient_Filter<int> qf(2, 8); // 4 slots
234
235 for (int i = 0; i < 4; ++i)
236 qf.insert(i * 100); // large spread to avoid collisions
237
238 // Filter is full (or nearly). Inserting more should throw.
239 // Some inserts might succeed due to fingerprint collisions (duplicates),
240 // so we try enough to guarantee overflow.
241 bool threw = false;
242 for (int i = 100; i < 200; ++i)
243 {
244 try { qf.insert(i); }
245 catch (const std::overflow_error &) { threw = true; break; }
246 }
248}
249
250// =====================================================================
251// Introspection
252// =====================================================================
253
255{
257 EXPECT_EQ(qf.memory_usage(), 1024u * sizeof(uint64_t));
258}
259
261{
263 double expected = 1.0 / 256.0; // 2^{-8}
264 EXPECT_DOUBLE_EQ(qf.false_positive_rate(), expected);
265}
266
268{
269 auto [q_val, r_val] = Quotient_Filter<int>::estimate(10000, 0.001);
270 EXPECT_GE(q_val, 1);
271 EXPECT_GE(r_val, 10); // -log2(0.001) ≈ 10
272 EXPECT_LE(q_val + r_val, 64);
273}
274
275// =====================================================================
276// Copy / Move semantics
277// =====================================================================
278
280{
282 for (int i = 0; i < 30; ++i)
283 qf.insert(i);
284
286 EXPECT_EQ(copy.size(), qf.size());
287 EXPECT_EQ(copy.q(), qf.q());
288 EXPECT_EQ(copy.r(), qf.r());
289
290 for (int i = 0; i < 30; ++i)
291 EXPECT_TRUE(copy.contains(i));
292}
293
295{
297 for (int i = 0; i < 30; ++i)
298 qf.insert(i);
299
300 size_t orig_size = qf.size();
301 Quotient_Filter<int> moved(std::move(qf));
302
303 EXPECT_EQ(moved.size(), orig_size);
304 for (int i = 0; i < 30; ++i)
305 EXPECT_TRUE(moved.contains(i));
306}
307
309{
310 Quotient_Filter<int> a(8, 8);
311 Quotient_Filter<int> b(8, 8);
312
313 a.insert(1);
314 b.insert(2);
315
316 a.swap(b);
317 EXPECT_TRUE(a.contains(2));
318 EXPECT_TRUE(b.contains(1));
319}
320
321// =====================================================================
322// Clear
323// =====================================================================
324
326{
328 for (int i = 0; i < 50; ++i)
329 qf.insert(i);
330
331 qf.clear();
332 EXPECT_EQ(qf.size(), 0u);
333 EXPECT_TRUE(qf.is_empty());
334
335 for (int i = 0; i < 50; ++i)
336 EXPECT_FALSE(qf.contains(i));
337}
338
339// =====================================================================
340// Merge
341// =====================================================================
342
344{
345 Quotient_Filter<int> a(10, 8, 42);
346 Quotient_Filter<int> b(10, 8, 42);
347
348 for (int i = 0; i < 50; ++i)
349 a.insert(i);
350 for (int i = 50; i < 100; ++i)
351 b.insert(i);
352
353 a.merge(b);
354
355 for (int i = 0; i < 100; ++i)
356 EXPECT_TRUE(a.contains(i)) << "Missing i=" << i;
357}
358
360{
361 Quotient_Filter<int> a(10, 8, 42);
362 Quotient_Filter<int> b(10, 9, 42); // different r
363
364 EXPECT_THROW(a.merge(b), std::domain_error);
365}
366
368{
369 Quotient_Filter<int> a(10, 8, 42);
370 Quotient_Filter<int> b(10, 8, 99); // different seed
371
372 EXPECT_THROW(a.merge(b), std::domain_error);
373}
374
375// =====================================================================
376// False positive rate (statistical)
377// =====================================================================
378
380{
381 const size_t n = 500;
382 Quotient_Filter<int> qf(12, 8); // 4096 slots, FP ≈ 1/256
383
384 for (size_t i = 0; i < n; ++i)
385 qf.insert(static_cast<int>(i));
386
387 size_t test_count = 10000;
388 size_t fps = 0;
389 for (size_t i = n + 10000; i < n + 10000 + test_count; ++i)
390 if (qf.contains(static_cast<int>(i)))
391 ++fps;
392
393 double empirical = static_cast<double>(fps) / test_count;
394 double theoretical = qf.false_positive_rate();
395
396 // Allow 5x margin over theoretical for statistical variance.
397 EXPECT_LT(empirical, theoretical * 5.0 + 0.01)
398 << "Empirical FP rate " << empirical
399 << " exceeds 5x theoretical " << theoretical;
400}
Quotient filter for probabilistic set membership with deletion.
Quotient_Filter & insert(const T &item)
Insert an element.
static Quotient_Filter from_capacity(size_t expected_n, double fp_rate, uint32_t seed=0x5F3759DF)
Construct from desired capacity and false positive rate.
void merge(const Quotient_Filter &other)
Merge another filter (same q, r, seed).
bool contains(const T &item) const noexcept
Test membership.
static std::pair< uint8_t, uint8_t > estimate(size_t n, double fp_rate)
Estimate (q, r) for n elements and target FP rate.
void swap(Quotient_Filter &other) noexcept
Swap in O(1).
#define TEST(name)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
Itor2 copy(Itor1 sourceBeg, const Itor1 &sourceEnd, Itor2 destBeg)
Copy elements from one range to another.
Definition ahAlgo.H:584
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
STL namespace.
Quotient filter: a cache-friendly probabilistic set with deletion.