Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
compact-quotient-filter.cc
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
36#include <gtest/gtest.h>
37
38#include <quotient-filter.H>
40
41#include <cstddef>
42#include <string>
43#include <set>
44#include <random>
45#include <vector>
46#include <algorithm>
47
48using namespace Aleph;
49using namespace std;
50
51// =====================================================================
52// Construction
53// =====================================================================
54
56{
58 EXPECT_EQ(qf.q(), 4);
59 EXPECT_EQ(qf.r(), 8);
60 EXPECT_EQ(qf.capacity(), 16u);
61 EXPECT_EQ(qf.size(), 0u);
62 EXPECT_TRUE(qf.is_empty());
63}
64
70
76
81
83{
85 EXPECT_GE(qf.capacity(), 1000u);
86 EXPECT_LE(qf.false_positive_rate(), 0.02);
87}
88
95
97{
98 Compact_Quotient_Filter<int> qf(10, 8); // 1024 slots, 11 bits/slot
99 size_t expected_bytes = (1024 * 11 + 7) / 8; // 1408 bytes
100 EXPECT_EQ(qf.memory_usage(), expected_bytes);
101
102 // Compare to standard: would be 1024 * 8 = 8192 bytes
103 // Savings: ~82%
104 EXPECT_LT(qf.memory_usage(), 1024u * 2);
105}
106
107// =====================================================================
108// Basic insert / contains
109// =====================================================================
110
112{
114
115 for (int i = 0; i < 100; ++i)
116 qf.insert(i);
117
118 EXPECT_EQ(qf.size(), 100u);
119
120 for (int i = 0; i < 100; ++i)
121 EXPECT_TRUE(qf.contains(i)) << "False negative for i=" << i;
122}
123
125{
127
128 for (int i = 0; i < 100; ++i)
129 qf.insert(i);
130
131 size_t false_positives = 0;
132 for (int i = 1000; i < 2000; ++i)
133 if (qf.contains(i))
135
136 // With r=10, FP rate ≈ 1/1024 ≈ 0.1%.
137 // Testing 1000 elements: expect < 10 false positives.
138 EXPECT_LT(false_positives, 20u) << "Too many false positives: " << false_positives;
139}
140
142{
144
145 qf.insert(42);
146 EXPECT_EQ(qf.size(), 1u);
147
148 qf.insert(42);
149 EXPECT_EQ(qf.size(), 1u);
150
151 EXPECT_TRUE(qf.contains(42));
152}
153
155{
157
158 qf.insert("hello");
159 qf.insert("world");
160 qf.insert("aleph");
161
162 EXPECT_TRUE(qf.contains("hello"));
163 EXPECT_TRUE(qf.contains("world"));
164 EXPECT_TRUE(qf.contains("aleph"));
165
166 EXPECT_EQ(qf.size(), 3u);
167}
168
170{
171 // Test with very small r (high FP rate but minimal memory)
172 Compact_Quotient_Filter<int> qf(8, 4); // 256 slots, 7 bits/slot
173
174 for (int i = 0; i < 50; ++i)
175 qf.insert(i);
176
177 for (int i = 0; i < 50; ++i)
178 EXPECT_TRUE(qf.contains(i));
179
180 // Memory should be tiny: 256 * 7 / 8 = 224 bytes
181 EXPECT_EQ(qf.memory_usage(), 224u);
182}
183
185{
186 // Test with large r (low FP rate)
187 Compact_Quotient_Filter<int> qf(8, 20); // 256 slots, 23 bits/slot
188
189 for (int i = 0; i < 50; ++i)
190 qf.insert(i);
191
192 for (int i = 0; i < 50; ++i)
193 EXPECT_TRUE(qf.contains(i));
194
195 // FP rate should be ~2^-20 ≈ 0.0001%
196 EXPECT_LT(qf.false_positive_rate(), 0.000002);
197}
198
199// =====================================================================
200// Deletion
201// =====================================================================
202
204{
206
207 qf.insert(10);
208 qf.insert(20);
209 qf.insert(30);
210 EXPECT_EQ(qf.size(), 3u);
211
212 EXPECT_TRUE(qf.remove(20));
213 EXPECT_EQ(qf.size(), 2u);
214 EXPECT_FALSE(qf.contains(20));
215 EXPECT_TRUE(qf.contains(10));
216 EXPECT_TRUE(qf.contains(30));
217}
218
220{
222
223 qf.insert(1);
224 EXPECT_FALSE(qf.remove(999));
225 EXPECT_EQ(qf.size(), 1u);
226 EXPECT_TRUE(qf.contains(1));
227}
228
230{
232
233 for (int i = 0; i < 50; ++i)
234 qf.insert(i);
235
236 for (int i = 0; i < 50; ++i)
237 EXPECT_TRUE(qf.remove(i)) << "Failed to remove i=" << i;
238
239 EXPECT_EQ(qf.size(), 0u);
240 EXPECT_TRUE(qf.is_empty());
241}
242
244{
246
247 qf.insert(5);
248 qf.remove(5);
249 EXPECT_FALSE(qf.contains(5));
250
251 qf.insert(5);
252 EXPECT_TRUE(qf.contains(5));
253 EXPECT_EQ(qf.size(), 1u);
254}
255
257{
259
260 for (int i = 0; i < 100; ++i)
261 qf.insert(i);
262
263 // Remove every other element
264 for (int i = 0; i < 100; i += 2)
265 EXPECT_TRUE(qf.remove(i));
266
267 EXPECT_EQ(qf.size(), 50u);
268
269 // Verify odd elements remain
270 for (int i = 1; i < 100; i += 2)
271 EXPECT_TRUE(qf.contains(i)) << "Lost i=" << i;
272
273 // Verify even elements removed
274 for (int i = 0; i < 100; i += 2)
275 EXPECT_FALSE(qf.contains(i)) << "Still has i=" << i;
276}
277
278// =====================================================================
279// Capacity and overflow
280// =====================================================================
281
283{
284 Compact_Quotient_Filter<int> qf(4, 8); // 16 slots
285 EXPECT_DOUBLE_EQ(qf.load_factor(), 0.0);
286
287 for (int i = 0; i < 8; ++i)
288 qf.insert(i);
289
290 EXPECT_DOUBLE_EQ(qf.load_factor(), 0.5);
291}
292
294{
295 Compact_Quotient_Filter<int> qf(2, 8); // 4 slots
296
297 for (int i = 0; i < 4; ++i)
298 qf.insert(i * 100);
299
300 bool threw = false;
301 for (int i = 100; i < 200; ++i)
302 {
303 try
304 {
305 qf.insert(i);
306 }
307 catch (const std::overflow_error &)
308 {
309 threw = true;
310 break;
311 }
312 }
314}
315
317{
318 Compact_Quotient_Filter<int> qf(8, 8); // 256 slots
319
320 // Fill to 90% capacity
321 for (int i = 0; i < 230; ++i)
322 qf.insert(i);
323
324 EXPECT_GT(qf.load_factor(), 0.85);
325
326 // All elements should still be found
327 for (int i = 0; i < 230; ++i)
328 EXPECT_TRUE(qf.contains(i)) << "Lost i=" << i;
329}
330
331// =====================================================================
332// Introspection
333// =====================================================================
334
336{
338 size_t expected = (1024 * 11 + 7) / 8; // 1024 slots * 11 bits/slot
339 EXPECT_EQ(qf.memory_usage(), expected);
340}
341
343{
345 double expected = 1.0 / 256.0; // 2^{-8}
346 EXPECT_DOUBLE_EQ(qf.false_positive_rate(), expected);
347}
348
356
357// =====================================================================
358// Copy / Move semantics
359// =====================================================================
360
362{
364 for (int i = 0; i < 30; ++i)
365 qf.insert(i);
366
368 EXPECT_EQ(copy.size(), qf.size());
369 EXPECT_EQ(copy.q(), qf.q());
370 EXPECT_EQ(copy.r(), qf.r());
371
372 for (int i = 0; i < 30; ++i)
373 EXPECT_TRUE(copy.contains(i));
374}
375
377{
379 for (int i = 0; i < 30; ++i)
380 qf.insert(i);
381
382 size_t orig_size = qf.size();
384
385 EXPECT_EQ(moved.size(), orig_size);
386 for (int i = 0; i < 30; ++i)
387 EXPECT_TRUE(moved.contains(i));
388}
389
391{
394
395 a.insert(1);
396 b.insert(2);
397
398 a.swap(b);
399 EXPECT_TRUE(a.contains(2));
400 EXPECT_TRUE(b.contains(1));
401}
402
403// =====================================================================
404// Clear
405// =====================================================================
406
408{
410 for (int i = 0; i < 50; ++i)
411 qf.insert(i);
412
413 qf.clear();
414 EXPECT_EQ(qf.size(), 0u);
415 EXPECT_TRUE(qf.is_empty());
416
417 for (int i = 0; i < 50; ++i)
418 EXPECT_FALSE(qf.contains(i));
419}
420
422{
424
425 for (int i = 0; i < 20; ++i)
426 qf.insert(i);
427
428 qf.clear();
429
430 for (int i = 100; i < 120; ++i)
431 qf.insert(i);
432
433 EXPECT_EQ(qf.size(), 20u);
434 for (int i = 100; i < 120; ++i)
435 EXPECT_TRUE(qf.contains(i));
436}
437
438// =====================================================================
439// Merge
440// =====================================================================
441
443{
444 Compact_Quotient_Filter<int> a(10, 8, 42);
445 Compact_Quotient_Filter<int> b(10, 8, 42);
446
447 for (int i = 0; i < 50; ++i)
448 a.insert(i);
449 for (int i = 50; i < 100; ++i)
450 b.insert(i);
451
452 a.merge(b);
453
454 for (int i = 0; i < 100; ++i)
455 EXPECT_TRUE(a.contains(i)) << "Missing i=" << i;
456}
457
459{
460 Compact_Quotient_Filter<int> a(10, 8, 42);
461 Compact_Quotient_Filter<int> b(10, 9, 42); // different r
462
463 EXPECT_THROW(a.merge(b), std::domain_error);
464}
465
467{
468 Compact_Quotient_Filter<int> a(10, 8, 42);
469 Compact_Quotient_Filter<int> b(10, 8, 99); // different seed
470
471 EXPECT_THROW(a.merge(b), std::domain_error);
472}
473
475{
476 Compact_Quotient_Filter<int> a(10, 8, 42);
477 Compact_Quotient_Filter<int> b(10, 8, 42);
478
479 for (int i = 0; i < 60; ++i)
480 a.insert(i);
481 for (int i = 40; i < 100; ++i)
482 b.insert(i);
483
484 a.merge(b);
485
486 // Should have union: [0, 100)
487 for (int i = 0; i < 100; ++i)
488 EXPECT_TRUE(a.contains(i)) << "Missing i=" << i;
489}
490
491// =====================================================================
492// False positive rate (statistical)
493// =====================================================================
494
496{
497 const size_t n = 500;
498 Compact_Quotient_Filter<int> qf(12, 8); // 4096 slots, FP ≈ 1/256
499
500 for (size_t i = 0; i < n; ++i)
501 qf.insert(static_cast<int>(i));
502
503 size_t test_count = 10000;
504 size_t fps = 0;
505 for (size_t i = n + 10000; i < n + 10000 + test_count; ++i)
506 if (qf.contains(static_cast<int>(i)))
507 ++fps;
508
509 double empirical = static_cast<double>(fps) / test_count;
510 double theoretical = qf.false_positive_rate();
511
512 // Allow 5x margin over theoretical for statistical variance.
513 EXPECT_LT(empirical, theoretical * 5.0 + 0.01)
514 << "Empirical FP rate " << empirical << " exceeds 5x theoretical " << theoretical;
515}
516
518{
519 Compact_Quotient_Filter<int> qf(10, 16); // r=16 → FP ≈ 1/65536
520
521 for (int i = 0; i < 200; ++i)
522 qf.insert(i);
523
524 size_t fps = 0;
525 for (int i = 10000; i < 20000; ++i)
526 if (qf.contains(i))
527 ++fps;
528
529 // With 10000 tests and FP rate ~0.0015%, expect < 5 false positives
530 EXPECT_LT(fps, 10u);
531}
532
533// =====================================================================
534// Stress tests
535// =====================================================================
536
538{
539 Compact_Quotient_Filter<int> qf(14, 10); // 16K slots
540 std::mt19937 rng(42);
541 std::uniform_int_distribution<int> dist(0, 1000000);
542
543 std::set<int> inserted;
544
545 // Insert 5000 random elements
546 for (int i = 0; i < 5000; ++i)
547 {
548 int val = dist(rng);
549 qf.insert(val);
550 inserted.insert(val);
551 }
552
553 // All inserted elements must be found
554 for (int val : inserted)
555 EXPECT_TRUE(qf.contains(val)) << "Lost value " << val;
556}
557
559{
560 Compact_Quotient_Filter<int> qf(10, 8); // 1024 slots
561
562 for (int cycle = 0; cycle < 5; ++cycle)
563 {
564 qf.clear(); // Start fresh each cycle
565
566 // Insert 200 elements
567 for (int i = 0; i < 200; ++i)
568 qf.insert(cycle * 1000 + i);
569
570 // Remove half
571 for (int i = 0; i < 100; ++i)
572 qf.remove(cycle * 1000 + i);
573
574 // Verify remaining half
575 for (int i = 100; i < 200; ++i)
576 EXPECT_TRUE(qf.contains(cycle * 1000 + i)) << "Cycle " << cycle << ", lost i=" << i;
577 }
578}
579
581{
582 // Small q forces many collisions
583 Compact_Quotient_Filter<int> qf(6, 12); // 64 slots, 15 bits/slot
584
585 for (int i = 0; i < 40; ++i)
586 qf.insert(i);
587
588 for (int i = 0; i < 40; ++i)
589 EXPECT_TRUE(qf.contains(i)) << "Lost i=" << i;
590
591 // Remove some
592 for (int i = 0; i < 20; ++i)
593 EXPECT_TRUE(qf.remove(i));
594
595 // Verify remaining
596 for (int i = 20; i < 40; ++i)
597 EXPECT_TRUE(qf.contains(i));
598}
599
600// =====================================================================
601// Edge cases
602// =====================================================================
603
605{
606 Compact_Quotient_Filter<int> qf(1, 8); // 2 slots
607
608 qf.insert(1);
609 EXPECT_TRUE(qf.contains(1));
610
611 qf.insert(2);
612 EXPECT_TRUE(qf.contains(1));
613 EXPECT_TRUE(qf.contains(2));
614}
615
617{
618 Compact_Quotient_Filter<int> qf(8, 1); // r=1 → 50% FP rate
619
620 for (int i = 0; i < 50; ++i)
621 qf.insert(i);
622
623 for (int i = 0; i < 50; ++i)
624 EXPECT_TRUE(qf.contains(i));
625}
626
628{
629 Compact_Quotient_Filter<int> qf(4, 60); // r=60 → extremely low FP
630
631 qf.insert(42);
632 EXPECT_TRUE(qf.contains(42));
633 EXPECT_FALSE(qf.contains(43));
634}
635
637{
638 Compact_Quotient_Filter<int> qf(4, 8); // 16 slots
639
640 // Force wrap-around by filling near the end
641 for (int i = 0; i < 16; ++i)
642 qf.insert(i);
643
644 for (int i = 0; i < 16; ++i)
645 EXPECT_TRUE(qf.contains(i)) << "Lost i=" << i;
646}
647
648// =====================================================================
649// Comparison with standard Quotient_Filter
650// =====================================================================
651
653{
654 const uint32_t seed = 12345;
655 Quotient_Filter<int> standard(10, 8, seed);
657
658 std::vector<int> values = {1, 5, 10, 42, 99, 123, 456, 789};
659
660 for (int val : values)
661 {
662 standard.insert(val);
663 compact.insert(val);
664 }
665
666 // Both should find all inserted elements
667 for (int val : values)
668 {
669 EXPECT_TRUE(standard.contains(val));
670 EXPECT_TRUE(compact.contains(val));
671 }
672
673 // Both should have same size
674 EXPECT_EQ(standard.size(), compact.size());
675}
676
678{
679 Quotient_Filter<int> standard(10, 8);
681
682 size_t standard_mem = standard.memory_usage();
683 size_t compact_mem = compact.memory_usage();
684
685 // Compact should use ~82% less memory
686 EXPECT_LT(compact_mem, standard_mem / 5); // < 20% of standard
687
688 // Standard: 1024 slots * 8 bytes = 8192 bytes
689 EXPECT_EQ(standard_mem, 8192u);
690
691 // Compact: 1024 slots * 11 bits = 11264 bits = 1408 bytes
692 EXPECT_EQ(compact_mem, 1408u);
693}
Memory-optimized quotient filter with bit-packed slots.
void swap(Compact_Quotient_Filter &other) noexcept
Swap in O(1).
static std::pair< uint8_t, uint8_t > estimate(size_t n, double fp_rate)
Estimate (q, r) for n elements and target FP rate.
static Compact_Quotient_Filter from_capacity(size_t expected_n, double fp_rate, uint32_t seed=0x5F3759DF)
Construct from desired capacity and false positive rate.
Compact_Quotient_Filter & insert(const T &item)
Insert an element.
void merge(const Compact_Quotient_Filter &other)
Merge another filter (same q, r, seed).
bool contains(const T &item) const noexcept
Test membership.
Quotient filter for probabilistic set membership with deletion.
Quotient_Filter & insert(const T &item)
Insert an element.
size_t memory_usage() const noexcept
Memory usage in bytes.
size_t size() const noexcept
Number of elements.
bool contains(const T &item) const noexcept
Test membership.
Memory-optimized quotient filter with bit-packed slots.
#define TEST(name)
static mt19937 rng
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
Itor2 copy(Itor1 sourceBeg, const Itor1 &sourceEnd, Itor2 destBeg)
Copy elements from one range to another.
Definition ahAlgo.H:584
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
STL namespace.
Quotient filter: a cache-friendly probabilistic set with deletion.
ValueArg< size_t > seed
Definition testHash.C:48