Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
bloom-filter.cc
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
38#include <gtest/gtest.h>
39
40#include <bloom-filter.H>
41
42#include <algorithm>
43#include <cstddef>
44#include <string>
45#include <vector>
46
47using namespace Aleph;
48using namespace std;
49
50namespace {
51
52size_t identity_hash(const int &item, unsigned long) noexcept
53{
54 return static_cast<size_t>(item);
55}
56
57size_t salted_identity_hash(const int &item, unsigned long seed) noexcept
58{
59 return static_cast<size_t>(item) + static_cast<size_t>(seed);
60}
61
62template <typename T>
64{
66 DynList<T> tmp = list;
67 tmp.for_each([&out](auto &value) { out.push_back(value); });
68 return out;
69}
70
71} // namespace
72
74{
75 EXPECT_THROW((Bloom_Filter<int>(0, 1, identity_hash, 0)), std::invalid_argument);
76 EXPECT_THROW((Bloom_Filter<int>(8, 0, identity_hash, 0)), std::invalid_argument);
77 EXPECT_THROW((Bloom_Filter<int>(8, 1, nullptr, 0)), std::invalid_argument);
78}
79
81{
83
84 for (int i = 0; i < 50; ++i)
85 {
86 f.insert(i);
88 }
89}
90
92{
93 // Deterministic mapping: with k=1 and identity_hash ignoring seed, the bit
94 // index is item % m.
95 Bloom_Filter<int> f(128, 1, identity_hash, 123);
96
97 f.insert(1);
98 f.insert(2);
99 f.insert(3);
100
101 EXPECT_TRUE(f.contains(1));
102 EXPECT_TRUE(f.contains(2));
103 EXPECT_TRUE(f.contains(3));
104
105 // These map to distinct bits not set above.
106 EXPECT_FALSE(f.contains(10));
107 EXPECT_FALSE(f.contains(64));
108 EXPECT_FALSE(f.contains(127));
109}
110
112{
114
115 const auto seeds = to_vector(f.hash_seeds());
116 EXPECT_EQ(seeds.size(), 5u);
117
118 const auto hashes = to_vector(f.hashes(123));
119 EXPECT_EQ(hashes.size(), 5u);
120 for (const auto h : hashes)
121 EXPECT_LT(h, f.get_m());
122}
123
125{
126 Bloom_Filter<int> f(64, 1, identity_hash, 999);
127
128 f.insert(0);
129 f.insert(3);
130 f.insert(63);
131
132 const auto bits = to_vector(f.set_bits());
133 EXPECT_EQ(bits.size(), 3u);
134
135 EXPECT_NE(find(bits.begin(), bits.end(), 0u), bits.end());
136 EXPECT_NE(find(bits.begin(), bits.end(), 3u), bits.end());
137 EXPECT_NE(find(bits.begin(), bits.end(), 63u), bits.end());
138}
139
141{
143 for (int i = 0; i < 20; ++i)
144 f.insert(i);
145
147 EXPECT_EQ(copy.get_m(), f.get_m());
148 EXPECT_EQ(copy.get_k(), f.get_k());
149 EXPECT_EQ(copy.get_n(), f.get_n());
150 for (int i = 0; i < 20; ++i)
151 EXPECT_TRUE(copy.contains(i));
152
153 Bloom_Filter<int> moved(std::move(copy));
154 for (int i = 0; i < 20; ++i)
155 EXPECT_TRUE(moved.contains(i));
156}
157
159{
160 // Create a base filter and copy it to guarantee compatibility (same seeds).
161 Bloom_Filter<int> base(128, 2, salted_identity_hash, 101);
162 Bloom_Filter<int> a(base);
163 Bloom_Filter<int> b(base);
164
165 a.insert(1);
166 a.insert(2);
167 b.insert(2);
168 b.insert(3);
169
171 u |= b;
172 EXPECT_TRUE(u.contains(1));
173 EXPECT_TRUE(u.contains(2));
174 EXPECT_TRUE(u.contains(3));
175
177 inter &= b;
178 EXPECT_TRUE(inter.contains(2));
179
180 // With a Bloom filter, negative assertions are probabilistic; we only
181 // validate that the operation succeeds and keeps known positives.
182}
183
185{
187 Bloom_Filter<int> b(128, 2, salted_identity_hash, 2); // different internal seeds
188
189 try
190 {
191 a |= b;
192 FAIL() << "expected std::domain_error";
193 }
194 catch (const std::domain_error &e)
195 {
196 const string msg = e.what();
197 EXPECT_NE(msg.find("Bloom filter have different hashes"), string::npos);
198 }
199}
200
202{
203 Bloom_Filter<int> f(1024, 1, identity_hash, 0);
204 for (int i = 0; i < 50; ++i)
205 f.insert(i);
206
207 const auto x = f.get_x();
208 EXPECT_GE(x, 1u);
209 const auto est = f.expected_size(x);
210
211 // For sparse occupancy with k=1, this estimate should be close to x.
212 EXPECT_GE(est, 45u);
213 EXPECT_LE(est, 60u);
214}
Probabilistic set membership with Bloom filters.
long double h
Definition btreepic.C:154
Bloom filter implementation.
size_t expected_size(const size_t x) const noexcept
Estimate the number of inserted items from the number of bits set.
size_t get_n() const noexcept
Return the number of insertions performed on the filter.
DynList< size_t > set_bits() const
Return the indexes of bits set to 1.
bool contains(const T &item) const noexcept
Test membership.
size_t get_m() const noexcept
Return the number of bits of the filter.
DynList< size_t > hash_seeds() const
Return the internally generated per-hash seeds.
DynList< size_t > hashes(const T &item) const
Return the bit positions used by item.
size_t get_k() const noexcept
Return the number of hash functions used by the filter.
size_t get_x() const noexcept
Return the number of bits set to 1.
auto & insert(const T &item) noexcept
Insert an item.
Dynamic singly linked list with functional programming support.
Definition htlist.H:1423
void for_each(Operation &operation)
Traverse all the container and performs an operation on each element.
Definition ah-dry.H:685
#define FAIL(msg)
#define TEST(name)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
Itor2 copy(Itor1 sourceBeg, const Itor1 &sourceEnd, Itor2 destBeg)
Copy elements from one range to another.
Definition ahAlgo.H:584
Itor find(const Itor &beg, const Itor &end, const T &value)
Find the first element equal to a value.
Definition ahAlgo.H:230
std::vector< typename C::Item_Type > to_vector(const C &c)
Convert a container to a std::vector.
Definition ah-convert.H:228
DynList< T > maps(const C &c, Op op)
Classic map operation.
STL namespace.