Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
tpl_olhash.H
Go to the documentation of this file.
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
97# ifndef TPL_OLHASH_H
98# define TPL_OLHASH_H
99
100# include <iostream>
101# include <cstddef>
102# include <cstdint>
103# include <primes.H>
104# include <dlink.H>
105# include <ahDry.H>
106# include <hash-dry.H>
107# include <hashDry.H>
108# include <hash-fct.H>
109# include <ah-errors.H>
110
111using namespace Primes;
112
113using namespace Aleph;
114
115# ifdef N
116# define NBACKUP N
117# undef N
118# endif
119
120# ifdef M
121# define MBACKUP M
122# undef M
123# endif
124
125namespace Aleph
126{
127
160 template <typename Key, class Cmp = Aleph::equal_to<Key>>
162 : public OhashCommon<OLhashTable<Key, Cmp>, Key>,
163 public GenericTraverse<OLhashTable<Key, Cmp>>,
164 public LocateFunctions<OLhashTable<Key, Cmp>, Key>,
165 public FunctionalMethods<OLhashTable<Key, Cmp>, Key>,
166 public EqualToMethod<OLhashTable<Key, Cmp>>,
167 public StlAlephIterator<OLhashTable<Key, Cmp>>
168 {
169 friend class OhashCommon<OLhashTable<Key, Cmp>, Key>;
170
171 public:
172
173 using Key_Type = Key;
174
175 using Item_Type = Key;
176
177 using Hash_Fct = std::function<size_t(const Key &)>;
178
179 using Hash_Fct_Ptr = size_t (*)(const Key &);
180
185
186 struct Bucket
187 {
188 Key key;
189 char status = EMPTY;
190
193 };
194
195 [[nodiscard]] static Bucket *key_to_bucket(Key *rec) noexcept
196 {
197 // Note: cannot be constexpr due to reinterpret_cast
198 const auto base = reinterpret_cast<std::uintptr_t>(rec);
199 const auto offset = offsetof(Bucket, key);
200 return reinterpret_cast<Bucket *>(base - offset);
201 }
202
203 Bucket *table = nullptr;
204 size_t N = 0;
205
206 protected:
207
208 size_t len;
211 Cmp cmp;
212
213 private:
214
217
218 [[nodiscard]] bool is_valid_bucket(Bucket *bucket) const noexcept
219 {
220 if (table == nullptr)
221 return false;
222
223 const auto begin = reinterpret_cast<std::uintptr_t>(&table[0]);
224 const auto end = reinterpret_cast<std::uintptr_t>(&table[len]);
225 const auto addr = reinterpret_cast<std::uintptr_t>(bucket);
226
228 return false;
229
230 const auto offset_with_base =
231 static_cast<std::ptrdiff_t>(addr - begin);
232
233 return offset_with_base % sizeof(*bucket) == 0;
234 }
235
236 public:
237
238 [[nodiscard]] constexpr const Cmp &get_compare() const noexcept { return cmp; }
239
240 [[nodiscard]] constexpr Cmp &get_compare() noexcept { return cmp; }
241
242 public:
243
246 OLhashTable(const size_t l, Hash_Fct hash_f, Cmp cmp_f,
247 const float l_alpha, const float u_alpha, const bool resize)
248 : table(nullptr), N(0), len(Primes::next_prime(l)),
251 {
252 table = new Bucket[len];
253 }
254
258
267
272
280
282
285 {
286 if (table != nullptr)
287 delete[] table;
288 }
289
290 void swap(OLhashTable &other) noexcept
291 {
292 std::swap(table, other.table);
293 std::swap(N, other.N);
294 std::swap(len, other.len);
295 std::swap(cmp, other.cmp);
296 std::swap(hash_fct, other.hash_fct);
297 std::swap(lower_alpha, other.lower_alpha);
298 std::swap(upper_alpha, other.upper_alpha);
299 std::swap(with_resize, other.with_resize);
300 }
301
308
310 {
311 swap(other);
312 }
313
315 {
316 if (this == &other)
317 return *this;
318
319 if (len > other.N)
320 this->clean_table();
321 else
322 {
323 auto *new_table = new Bucket[other.len];
324 delete[] table;
326 N = 0;
327 len = other.len;
328 hash_fct = other.hash_fct;
329 cmp = other.cmp;
330 lower_alpha = other.lower_alpha;
331 upper_alpha = other.upper_alpha;
332 }
333
334 this->copy_from_table(other);
335
336 return *this;
337 }
338
340 {
341 swap(other);
342 return *this;
343 }
344
347 [[nodiscard]] Key *search(const Key &key) const noexcept
348 {
349 size_t i = hash_fct(key) % len;
350 for (size_t c = 0; c < len; ++c)
351 {
352 // Prefetch next bucket for better cache performance
353 __builtin_prefetch(&table[(i + 1 < len) ? i + 1 : 0], 0, 1);
354
355 if (table[i].status == EMPTY) [[unlikely]]
356 return nullptr;
357
358 if (table[i].status == BUSY and cmp(table[i].key, key)) [[likely]]
359 return &table[i].key;
360
361 if (++i == len)
362 i = 0;
363 }
364
365 return nullptr; // Key not found
366 }
367
368 protected:
369
370 Bucket *allocate_bucket(const Key &key) noexcept
371 {
372 Bucket * first_deleted = nullptr;
373 size_t i = hash_fct(key) % len;
374
375 for (size_t c = 0; c < len; ++c)
376 {
377 // Prefetch next bucket for better cache performance
378 __builtin_prefetch(&table[(i + 1 < len) ? i + 1 : 0], 0, 1);
379
380 auto & b = table[i];
381 if (b.status == EMPTY) [[likely]]
382 {
383 Bucket *bucket = first_deleted != nullptr ? first_deleted : &b;
384 bucket->status = BUSY;
385 ++N;
386 return bucket;
387 }
388 if (b.status == BUSY)
389 {
390 if (cmp(key, b.key)) [[unlikely]]
391 return nullptr; // Duplicate
392 }
393 else // DELETED
394 {
395 if (first_deleted == nullptr)
396 first_deleted = &b;
397 }
398
399 if (++i == len)
400 i = 0;
401 }
402
403 if (first_deleted != nullptr) [[unlikely]]
404 {
406 ++N;
407 return first_deleted;
408 }
409
410 return nullptr;
411 }
412
413 // Allocate a new bucket for storing key. It always returns a
414 // pointer to the bucket entry into the table. If the key is
415 // already inserted in the table, then second value is true,
416 // otherwise, not allocation is done, and false is returned as second
417 std::tuple<Bucket *, bool> hard_allocate_bucket(const Key &key) noexcept
418 {
419 Bucket * first_deleted = nullptr;
420 size_t i = hash_fct(key) % len;
421
422 for (size_t c = 0; c < len; ++c)
423 {
424 // Prefetch next bucket for better cache performance
425 __builtin_prefetch(&table[(i + 1 < len) ? i + 1 : 0], 0, 1);
426
427 auto & b = table[i];
428 if (b.status == EMPTY) [[likely]]
429 {
430 Bucket *bucket = first_deleted != nullptr ? first_deleted : &b;
431 bucket->status = BUSY;
432 ++N;
433 return {bucket, false};
434 }
435 else if (b.status == BUSY)
436 {
437 if (cmp(key, b.key))
438 return {&b, true}; // Found existing
439 }
440 else // DELETED
441 {
442 if (first_deleted == nullptr)
443 first_deleted = &b;
444 }
445
446 if (++i == len)
447 i = 0;
448 }
449
450 if (first_deleted != nullptr) [[unlikely]]
451 {
453 ++N;
454 return {first_deleted, false};
455 }
456
457 return {nullptr, false};
458 }
459
461 size_t prev_index(const size_t i) const noexcept
462 {
463 return (i == 0) ? len - 1 : i - 1;
464 }
465
467 size_t next_index(const size_t i) const noexcept
468 {
469 return (i + 1 == len) ? 0 : i + 1;
470 }
471
477 void cleanup_deleted_chain(size_t idx) noexcept
478 {
479 // If next bucket is not EMPTY, we must keep this as DELETED
480 if (table[next_index(idx)].status != EMPTY)
481 return;
482
483 // Next is EMPTY, so we can mark this as EMPTY
484 table[idx].status = EMPTY;
485
486 // Propagate backwards: any DELETED bucket whose next is now EMPTY
487 // can also become EMPTY
488 size_t prev = prev_index(idx);
489 size_t count = 0;
490 while (table[prev].status == DELETED and count < len)
491 {
492 table[prev].status = EMPTY;
493 prev = prev_index(prev);
494 ++count;
495 }
496 }
497
504 {
506 << "record address is not inside table's range";
507
508 ah_domain_error_if(bucket->status != BUSY)
509 << "Bucket containing record is not busy";
510
511 --N;
512 const auto idx = static_cast<size_t>(bucket - &table[0]);
513 table[idx].status = DELETED;
515 }
516
517 public:
518
523 void remove(const Key &key)
524 {
525 size_t i = hash_fct(key) % len;
526 for (size_t c = 0; c < len; ++c)
527 {
528 // Prefetch next bucket for better cache performance
529 __builtin_prefetch(&table[(i + 1 < len) ? i + 1 : 0], 0, 1);
530
531 if (table[i].status == EMPTY) [[unlikely]]
532 ah_domain_error() << "Key not in hash table";
533
534 if (table[i].status == BUSY and cmp(table[i].key, key)) [[likely]]
535 {
536 table[i].status = DELETED;
537 --N;
539 return;
540 }
541
542 if (++i == len)
543 i = 0;
544 }
545
546 ah_domain_error() << "Key not in hash table";
547 }
548
550
552
554 {
555 DynArray<size_t> lens;
556 size_t num_busy = 0;
557 size_t num_deleted = 0;
558 size_t num_empty = 0;
559 size_t max_len = std::numeric_limits<size_t>::min();
560 for (size_t i = 0; i < len; ++i)
561 switch (table[i].status)
562 {
563 case BUSY:
564 {
565 ++num_busy;
566 const Key &key = table[i].key;
567 size_t i = hash_fct(key) % len;
568 size_t count = 1;
569
570 while (true)
571 {
572 if (table[i].status == BUSY and cmp(table[i].key, key))
573 break;
574 ++count;
575 if (++i == len)
576 i = 0;
577 }
578
579 max_len = std::max(max_len, count);
580 update_stat_len(lens, count);
581 break;
582 }
583 case EMPTY:
584 ++num_empty;
585 update_stat_len(lens, 0);
586 break;
587 case DELETED:
588 ++num_deleted;
589 break;
590 }
591
592 float avg = 0;
593 float sum = 0;
594 for (size_t i = 0; i < lens.size(); ++i)
595 {
596 avg += lens(i) * i;
597 sum += lens(i);
598 }
599
600 avg /= sum;
601 float var = 0;
602 for (size_t i = 0; i < lens.size(); ++i)
603 {
604 const float s = i - avg;
605 var += lens(i) * s * s;
606 }
607 var /= sum;
608
609 Stats stats;
610 stats.num_busy = num_busy;
611 stats.num_deleted = num_deleted;
612 stats.num_empty = num_empty;
613 std::swap(lens, stats.lens);
614 stats.avg = avg;
615 stats.var = var;
616 stats.max_len = max_len;
617
618 return stats;
619 }
620 };
621
622 template <typename Key, class Cmp = Aleph::equal_to<Key>>
624
625}
626# endif // TPL_OLHASH_H
Exception handling system with formatted messages for Aleph-w.
#define ah_domain_error()
Throws std::domain_error unconditionally.
Definition ah-errors.H:554
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Definition ah-errors.H:522
#define ah_invalid_argument_if(C)
Throws std::invalid_argument if condition holds.
Definition ah-errors.H:639
DRY (Don't Repeat Yourself) utilities and macros.
#define Special_Ctors(Set_Type, Type)
Generates special constructors for containers.
Definition ahDry.H:113
size_t size() const noexcept
Return the current dimension of array.
Open addressing hash table with linear probing collision resolution.
Definition tpl_olhash.H:168
OLhashTable(size_t len, Hash_Fct hash_fct, Hash_Fct, Cmp cmp, float lower_alpha, float upper_alpha, bool with_resize)
Definition tpl_olhash.H:255
~OLhashTable()
Release all occupied memory.
Definition tpl_olhash.H:284
OLhashTable(size_t len, Hash_Fct_Ptr hash_fct, Hash_Fct_Ptr, Cmp cmp, float lower_alpha, float upper_alpha, bool with_resize)
Definition tpl_olhash.H:268
void deallocate_bucket(Bucket *bucket)
Removes the record pointed to by record from the table.
Definition tpl_olhash.H:503
OLhashTable(size_t len=Primes::DefaultPrime, Hash_Fct_Ptr hash_fct=Aleph::dft_hash_fct< Key >, Cmp cmp=Cmp(), float lower_alpha=hash_default_lower_alpha, float upper_alpha=0.70f, bool with_resize=true)
Definition tpl_olhash.H:259
Key * search(const Key &key) const noexcept
Finds the key and returns the associated record if key is find inside the table; otherwise,...
Definition tpl_olhash.H:347
std::tuple< Bucket *, bool > hard_allocate_bucket(const Key &key) noexcept
Definition tpl_olhash.H:417
size_t next_index(const size_t i) const noexcept
Index of next bucket (handles wrap-around)
Definition tpl_olhash.H:467
std::function< size_t(const Key &)> Hash_Fct
Definition tpl_olhash.H:177
OLhashTable(OLhashTable &&other) noexcept
Definition tpl_olhash.H:309
OLhashTable(size_t len, Hash_Fct hash_fct, Hash_Fct_Ptr, Cmp cmp, float lower_alpha, float upper_alpha, bool with_resize)
Constructor with two hash functions for metaprogramming compatibility with ODhashTable type.
Definition tpl_olhash.H:276
void cleanup_deleted_chain(size_t idx) noexcept
Cleanup DELETED entries that are at the end of collision chains.
Definition tpl_olhash.H:477
static void update_stat_len(DynArray< size_t > &lens, size_t i)
Definition tpl_olhash.H:549
OLhashTable(const OLhashTable &other)
Definition tpl_olhash.H:302
bool is_valid_bucket(Bucket *bucket) const noexcept
Definition tpl_olhash.H:218
size_t(*)(const Key &) Hash_Fct_Ptr
Definition tpl_olhash.H:179
constexpr const Cmp & get_compare() const noexcept
Definition tpl_olhash.H:238
Bucket * allocate_bucket(const Key &key) noexcept
Definition tpl_olhash.H:370
constexpr Cmp & get_compare() noexcept
Definition tpl_olhash.H:240
void swap(OLhashTable &other) noexcept
Definition tpl_olhash.H:290
OLhashTable(const size_t l, Hash_Fct hash_f, Cmp cmp_f, const float l_alpha, const float u_alpha, const bool resize)
Instantiate a hash table with hash function __hash_fct and dimension len.
Definition tpl_olhash.H:246
OLhashTable & operator=(OLhashTable &&other) noexcept
Definition tpl_olhash.H:339
size_t prev_index(const size_t i) const noexcept
Index of previous bucket (handles wrap-around)
Definition tpl_olhash.H:461
static Bucket * key_to_bucket(Key *rec) noexcept
Definition tpl_olhash.H:195
OLhashTable & operator=(const OLhashTable &other)
Definition tpl_olhash.H:314
typename OhashCommon< OLhashTable< Key, Cmp >, Key >::Stats Stats
Definition tpl_olhash.H:551
Stats stats() const
Definition tpl_olhash.H:553
void remove(const Key &key)
Remove the key referenced by key.
Definition tpl_olhash.H:523
Equality test for containers.
Definition ah-dry.H:1534
Common methods to the Aleph-w ( ) containers.
Definition ah-dry.H:548
Aleph::DynList< __T > maps(Operation &op) const
Map the elements of the container.
Definition ah-dry.H:904
Common sequential searching methods on containers.
Definition ah-dry.H:164
LocateFunctions< Container, Type > * base() const
Definition ah-dry.H:172
CRTP mixin providing common operations for open addressing hash tables.
Definition hashDry.H:101
size_t resize(size_t new_size)
Resizes the hash table to a new capacity.
Definition hashDry.H:524
void clean_table()
Removes all entries from the table without deallocating storage.
Definition hashDry.H:166
void copy_from_table(const HashTbl &other)
Copies all entries from another hash table.
Definition hashDry.H:143
Mixin that adds STL begin()/end() and cbegin()/cend() to Aleph containers.
iterator end() noexcept
Return an STL-compatible end iterator.
iterator begin() noexcept
Return an STL-compatible iterator to the first element.
Common hash table utilities and base classes.
#define OHASH_COMMON(class_name)
Definition hash-dry.H:48
Collection of general-purpose hash functions.
Common operations for open addressing hash tables (CRTP mixin).
const long double offset[]
Offset values indexed by symbol string length (bounded by MAX_OFFSET_INDEX)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
const float hash_default_lower_alpha
Definition hash-dry.C:38
DynList< T > maps(const C &c, Op op)
Classic map operation.
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
T sum(const Container &container, const T &init=T{})
Compute sum of all elements.
const unsigned long DefaultPrime
Default prime number used when no specific size is requested.
Definition primes.C:381
size_t next_prime(unsigned long n)
Find the smallest prime number >= n from the database.
Definition primes.C:383
Prime number utilities for hash tables and mathematical operations.
Generic traversal of the container through its iterator.
Definition ah-dry.H:65
DynList< int > l