Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
cuckoo-filter.cc
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9*/
10
11#include <gtest/gtest.h>
12#include <cuckoo-filter.H>
13#include <string>
14#include <vector>
15#include <set>
16#include <random>
17
18using namespace Aleph;
19
20// 1. Pruebas de Operaciones Básicas
23
24 EXPECT_TRUE(filter.insert("apple"));
25 EXPECT_TRUE(filter.insert("banana"));
26 EXPECT_TRUE(filter.insert("cherry"));
27
28 EXPECT_TRUE(filter.contains("apple"));
29 EXPECT_TRUE(filter.contains("banana"));
30 EXPECT_TRUE(filter.contains("cherry"));
31
32 // A cuckoo filter is probabilistic: negative lookups may produce false
33 // positives. Instead of asserting an exact negative, verify the false-
34 // positive rate over a fixed negative corpus stays within the expected bound.
35 const std::vector<std::string> negatives = {
36 "date", "elderberry", "fig", "grape", "honeydew",
37 "kiwi", "lemon", "mango", "nectarine", "orange"
38 };
39 size_t fp_count = 0;
40 for (const auto & s : negatives)
41 if (filter.contains(s)) ++fp_count;
42 // With a 12-bit fingerprint and only 3 items the FP rate is well under 10%,
43 // so at most 1 false positive in 10 negative queries is an acceptable budget.
44 EXPECT_LE(fp_count, 1u) << "Too many false positives: " << fp_count << "/10";
45
46 EXPECT_EQ(filter.size(), 3);
47}
48
49// 2. Pruebas de Eliminación
52
53 for (int i = 0; i < 100; ++i) {
54 filter.insert(i);
55 }
56
57 EXPECT_EQ(filter.size(), 100);
58
59 for (int i = 0; i < 50; ++i) {
60 EXPECT_TRUE(filter.remove(i));
61 }
62
63 EXPECT_EQ(filter.size(), 50);
64
65 // After deletion, lookups for removed keys may still return true (false
66 // positive — inherent to probabilistic filters). Budget: allow at most
67 // 3% of the 50 removed keys to register as false positives.
68 size_t fp_after_del = 0;
69 for (int i = 0; i < 50; ++i)
70 if (filter.contains(i)) ++fp_after_del;
72 << "Too many false positives after deletion: " << fp_after_del << "/50";
73
74 for (int i = 50; i < 100; ++i) {
75 EXPECT_TRUE(filter.contains(i));
76 }
77}
78
79// 3. Prueba de Duplicados
82
83 EXPECT_TRUE(filter.insert("same"));
84 EXPECT_TRUE(filter.insert("same"));
85
86 EXPECT_TRUE(filter.contains("same"));
87 EXPECT_EQ(filter.size(), 2);
88
89 EXPECT_TRUE(filter.remove("same"));
90 EXPECT_TRUE(filter.contains("same"));
91 EXPECT_EQ(filter.size(), 1);
92}
93
94// 4. Prueba de Filtro Lleno (Saturación)
97
98 bool full = false;
99 for (int i = 0; i < 100; ++i) {
100 if (!filter.insert(i)) {
101 full = true;
102 break;
103 }
104 }
105 EXPECT_TRUE(full);
106}
107
108// 5. Prueba con Diferentes Tipos de Datos
111 EXPECT_TRUE(dbl_filter.insert(3.14159));
112 EXPECT_TRUE(dbl_filter.contains(3.14159));
113 // Negative check is probabilistic; use a small corpus and cap FP budget.
114 const std::vector<double> neg_dbl = { 2.71828, 1.41421, 1.61803, 0.57722, 1.73205 };
115 size_t fp_dbl = 0;
116 for (double v : neg_dbl)
117 if (dbl_filter.contains(v)) ++fp_dbl;
118 EXPECT_LE(fp_dbl, 1u) << "Too many false positives for double filter: " << fp_dbl;
119}
120
121// 6. Prueba de Configuraciones de Plantilla
124 for (size_t i = 0; i < 500; ++i) EXPECT_TRUE(filter.insert(i));
125 for (size_t i = 0; i < 500; ++i) EXPECT_TRUE(filter.contains(i));
126}
127
128// 7. Prueba de Estrés
130 const size_t num_ops = 1000; // Reducido para evitar saturación excesiva
131 Cuckoo_Filter<int, 16, 4> filter(num_ops * 2, 42u); // fixed seed for determinism
132 std::vector<int> inserted_list;
133
134 std::mt19937 rng(42);
135 std::uniform_int_distribution<int> dist(0, 1000000);
136
137 for (size_t i = 0; i < num_ops; ++i) {
138 int val = dist(rng);
139 if (filter.insert(val)) {
140 inserted_list.push_back(val);
141 }
142 }
143
144 // Todos los que reportaron éxito deben estar
145 for (int val : inserted_list) {
146 EXPECT_TRUE(filter.contains(val)) << "Value " << val << " not found";
147 }
148
149 // Eliminación selectiva
150 size_t initial_size = inserted_list.size();
151 size_t removed_count = 0;
152 for (size_t i = 0; i < initial_size / 2; ++i) {
153 int val = inserted_list.back();
154 inserted_list.pop_back();
155 if (filter.remove(val)) {
157 }
158 }
159
161
162 // Los restantes deben seguir estando
163 for (int val : inserted_list) {
164 EXPECT_TRUE(filter.contains(val)) << "Remaining value " << val << " lost";
165 }
166}
167
168// 8. Alta Carga con Capacidad Alineada
170 // 512 buckets * 4 slots = 2048 capacity
171 const size_t capacity = 1800;
173
174 // Record which keys were actually accepted — some may fail if the filter
175 // reaches capacity, so we cannot assume insert(i) succeeded for all i < N.
176 std::vector<size_t> inserted_keys;
177 inserted_keys.reserve(capacity);
178 for (size_t i = 0; i < capacity; ++i)
179 if (filter.insert(i))
180 inserted_keys.push_back(i);
181
182 const double lf = filter.load_factor();
183 EXPECT_GT(lf, 0.70);
184
185 for (const size_t key : inserted_keys)
186 ASSERT_TRUE(filter.contains(key)) << "Item " << key << " lost at LF " << lf;
187}
188
191 filter.insert(1);
192 filter.insert(2);
193 filter.clear();
194
195 EXPECT_EQ(filter.size(), 0);
196 EXPECT_FALSE(filter.contains(1));
197 EXPECT_FALSE(filter.contains(2));
198}
Industrial-grade Cuckoo Filter implementation.
Probabilistic set membership with Cuckoo filters.
#define TEST(name)
static mt19937 rng
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
Container2< typename Container1::Item_Type > filter(Container1 &container, Operation &operation)
Filter elements that satisfy operation.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.