Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
tpl_olhash.H
Go to the documentation of this file.
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
97# ifndef TPL_OLHASH_H
98# define TPL_OLHASH_H
99
100# include <iostream>
101# include <cstddef>
102# include <cstdint>
103# include <primes.H>
104# include <dlink.H>
105# include <ahDry.H>
106# include <hash-dry.H>
107# include <hashDry.H>
108# include <hash-fct.H>
109# include <ah-errors.H>
110# include <ah-concepts.H>
111
112using namespace Primes;
113
114using namespace Aleph;
115
116# ifdef N
117# define NBACKUP N
118# undef N
119# endif
120
121# ifdef M
122# define MBACKUP M
123# undef M
124# endif
125
126namespace Aleph
127{
128
161 template <typename Key, class Cmp = Aleph::equal_to<Key>>
164 : public OhashCommon<OLhashTable<Key, Cmp>, Key>,
165 public GenericTraverse<OLhashTable<Key, Cmp>>,
166 public LocateFunctions<OLhashTable<Key, Cmp>, Key>,
167 public FunctionalMethods<OLhashTable<Key, Cmp>, Key>,
168 public EqualToMethod<OLhashTable<Key, Cmp>>,
169 public StlAlephIterator<OLhashTable<Key, Cmp>>
170 {
171 friend class OhashCommon<OLhashTable<Key, Cmp>, Key>;
172
173 public:
174
175 using Key_Type = Key;
176
177 using Item_Type = Key;
178
179 using Hash_Fct = std::function<size_t(const Key &)>;
180
181 using Hash_Fct_Ptr = size_t (*)(const Key &);
182
187
188 struct Bucket
189 {
190 Key key;
191 char status = EMPTY;
192
195 };
196
197 [[nodiscard]] static Bucket *key_to_bucket(Key *rec) noexcept
198 {
199 // Note: cannot be constexpr due to reinterpret_cast
200 const auto base = reinterpret_cast<std::uintptr_t>(rec);
201 const auto offset = offsetof(Bucket, key);
202 return reinterpret_cast<Bucket *>(base - offset);
203 }
204
205 Bucket *table = nullptr;
206 size_t N = 0;
207
208 protected:
209
210 size_t len;
214
215 private:
216
219
220 [[nodiscard]] bool is_valid_bucket(Bucket *bucket) const noexcept
221 {
222 if (table == nullptr)
223 return false;
224
225 const auto begin = reinterpret_cast<std::uintptr_t>(&table[0]);
226 const auto end = reinterpret_cast<std::uintptr_t>(&table[len]);
227 const auto addr = reinterpret_cast<std::uintptr_t>(bucket);
228
230 return false;
231
232 const auto offset_with_base =
233 static_cast<std::ptrdiff_t>(addr - begin);
234
235 return offset_with_base % sizeof(*bucket) == 0;
236 }
237
238 public:
239
240 [[nodiscard]] constexpr const Cmp &get_compare() const noexcept { return cmp; }
241
242 [[nodiscard]] constexpr Cmp &get_compare() noexcept { return cmp; }
243
244 public:
245
249 const float l_alpha, const float u_alpha, const bool resize)
250 : table(nullptr), N(0), len(Primes::next_prime(l)),
253 {
254 table = new Bucket[len];
255 }
256
260
262
263 using Base::contains;
264
273
278
286
288
291 {
292 if (table != nullptr)
293 delete[] table;
294 }
295
296 void swap(OLhashTable &other) noexcept
297 {
298 std::swap(table, other.table);
299 std::swap(N, other.N);
300 std::swap(len, other.len);
301 std::swap(cmp, other.cmp);
302 std::swap(hash_fct, other.hash_fct);
303 std::swap(lower_alpha, other.lower_alpha);
304 std::swap(upper_alpha, other.upper_alpha);
305 std::swap(with_resize, other.with_resize);
306 }
307
314
316 {
317 swap(other);
318 }
319
321 {
322 if (this == &other)
323 return *this;
324
325 if (len > other.N)
326 this->clean_table();
327 else
328 {
329 auto *new_table = new Bucket[other.len];
330 delete[] table;
332 N = 0;
333 len = other.len;
334 hash_fct = other.hash_fct;
335 cmp = other.cmp;
336 lower_alpha = other.lower_alpha;
337 upper_alpha = other.upper_alpha;
338 }
339
340 this->copy_from_table(other);
341
342 return *this;
343 }
344
346 {
347 swap(other);
348 return *this;
349 }
350
353 [[nodiscard]] Key *search(const Key &key) const noexcept
354 {
355 size_t i = hash_fct(key) % len;
356 for (size_t c = 0; c < len; ++c)
357 {
358 // Prefetch next bucket for better cache performance
359 __builtin_prefetch(&table[(i + 1 < len) ? i + 1 : 0], 0, 1);
360
361 if (table[i].status == EMPTY) [[unlikely]]
362 return nullptr;
363
364 if (table[i].status == BUSY and cmp(table[i].key, key)) [[likely]]
365 return &table[i].key;
366
367 if (++i == len)
368 i = 0;
369 }
370
371 return nullptr; // Key not found
372 }
373
374 protected:
375
376 Bucket *allocate_bucket(const Key &key) noexcept
377 {
378 Bucket * first_deleted = nullptr;
379 size_t i = hash_fct(key) % len;
380
381 for (size_t c = 0; c < len; ++c)
382 {
383 // Prefetch next bucket for better cache performance
384 __builtin_prefetch(&table[(i + 1 < len) ? i + 1 : 0], 0, 1);
385
386 auto & b = table[i];
387 if (b.status == EMPTY) [[likely]]
388 {
389 Bucket *bucket = first_deleted != nullptr ? first_deleted : &b;
390 bucket->status = BUSY;
391 ++N;
392 return bucket;
393 }
394 if (b.status == BUSY)
395 {
396 if (cmp(key, b.key)) [[unlikely]]
397 return nullptr; // Duplicate
398 }
399 else // DELETED
400 {
401 if (first_deleted == nullptr)
402 first_deleted = &b;
403 }
404
405 if (++i == len)
406 i = 0;
407 }
408
409 if (first_deleted != nullptr) [[unlikely]]
410 {
412 ++N;
413 return first_deleted;
414 }
415
416 return nullptr;
417 }
418
419 // Allocate a new bucket for storing key. It always returns a
420 // pointer to the bucket entry into the table. If the key is
421 // already inserted in the table, then second value is true,
422 // otherwise, not allocation is done, and false is returned as second
423 std::tuple<Bucket *, bool> hard_allocate_bucket(const Key &key) noexcept
424 {
425 Bucket * first_deleted = nullptr;
426 size_t i = hash_fct(key) % len;
427
428 for (size_t c = 0; c < len; ++c)
429 {
430 // Prefetch next bucket for better cache performance
431 __builtin_prefetch(&table[(i + 1 < len) ? i + 1 : 0], 0, 1);
432
433 auto & b = table[i];
434 if (b.status == EMPTY) [[likely]]
435 {
436 Bucket *bucket = first_deleted != nullptr ? first_deleted : &b;
437 bucket->status = BUSY;
438 ++N;
439 return {bucket, false};
440 }
441 else if (b.status == BUSY)
442 {
443 if (cmp(key, b.key))
444 return {&b, true}; // Found existing
445 }
446 else // DELETED
447 {
448 if (first_deleted == nullptr)
449 first_deleted = &b;
450 }
451
452 if (++i == len)
453 i = 0;
454 }
455
456 if (first_deleted != nullptr) [[unlikely]]
457 {
459 ++N;
460 return {first_deleted, false};
461 }
462
463 return {nullptr, false};
464 }
465
467 [[nodiscard]] size_t prev_index(const size_t i) const noexcept
468 {
469 return (i == 0) ? len - 1 : i - 1;
470 }
471
473 [[nodiscard]] size_t next_index(const size_t i) const noexcept
474 {
475 return (i + 1 == len) ? 0 : i + 1;
476 }
477
483 void cleanup_deleted_chain(size_t idx) noexcept
484 {
485 // If next bucket is not EMPTY, we must keep this as DELETED
486 if (table[next_index(idx)].status != EMPTY)
487 return;
488
489 // Next is EMPTY, so we can mark this as EMPTY
490 table[idx].status = EMPTY;
491
492 // Propagate backwards: any DELETED bucket whose next is now EMPTY
493 // can also become EMPTY
494 size_t prev = prev_index(idx);
495 size_t count = 0;
496 while (table[prev].status == DELETED and count < len)
497 {
498 table[prev].status = EMPTY;
499 prev = prev_index(prev);
500 ++count;
501 }
502 }
503
510 {
512 << "record address is not inside table's range";
513
514 ah_domain_error_if(bucket->status != BUSY)
515 << "Bucket containing record is not busy";
516
517 --N;
518 const auto idx = static_cast<size_t>(bucket - &table[0]);
519 table[idx].status = DELETED;
521 }
522
523 public:
524
529 void remove(const Key &key)
530 {
531 size_t i = hash_fct(key) % len;
532 for (size_t c = 0; c < len; ++c)
533 {
534 // Prefetch next bucket for better cache performance
535 __builtin_prefetch(&table[(i + 1 < len) ? i + 1 : 0], 0, 1);
536
537 if (table[i].status == EMPTY) [[unlikely]]
538 ah_domain_error() << "Key not in hash table";
539
540 if (table[i].status == BUSY and cmp(table[i].key, key)) [[likely]]
541 {
542 table[i].status = DELETED;
543 --N;
545 return;
546 }
547
548 if (++i == len)
549 i = 0;
550 }
551
552 ah_domain_error() << "Key not in hash table";
553 }
554
556
558
560 {
561 DynArray<size_t> lens;
562 size_t num_busy = 0;
563 size_t num_deleted = 0;
564 size_t num_empty = 0;
565 size_t max_len = std::numeric_limits<size_t>::min();
566 for (size_t i = 0; i < len; ++i)
567 switch (table[i].status)
568 {
569 case BUSY:
570 {
571 ++num_busy;
572 const Key &key = table[i].key;
573 size_t i = hash_fct(key) % len;
574 size_t count = 1;
575
576 while (true)
577 {
578 if (table[i].status == BUSY and cmp(table[i].key, key))
579 break;
580 ++count;
581 if (++i == len)
582 i = 0;
583 }
584
585 max_len = std::max(max_len, count);
586 update_stat_len(lens, count);
587 break;
588 }
589 case EMPTY:
590 ++num_empty;
591 update_stat_len(lens, 0);
592 break;
593 case DELETED:
594 ++num_deleted;
595 break;
596 }
597
598 float avg = 0;
599 float sum = 0;
600 for (size_t i = 0; i < lens.size(); ++i)
601 {
602 avg += lens(i) * i;
603 sum += lens(i);
604 }
605
606 avg /= sum;
607 float var = 0;
608 for (size_t i = 0; i < lens.size(); ++i)
609 {
610 const float s = i - avg;
611 var += lens(i) * s * s;
612 }
613 var /= sum;
614
615 Stats stats;
616 stats.num_busy = num_busy;
617 stats.num_deleted = num_deleted;
618 stats.num_empty = num_empty;
619 std::swap(lens, stats.lens);
620 stats.avg = avg;
621 stats.var = var;
622 stats.max_len = max_len;
623
624 return stats;
625 }
626 };
627
628 template <typename Key, class Cmp = Aleph::equal_to<Key>>
630
631}
632# endif // TPL_OLHASH_H
C++20 concepts for constraining comparison functors.
Exception handling system with formatted messages for Aleph-w.
#define ah_domain_error()
Throws std::domain_error unconditionally.
Definition ah-errors.H:554
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Definition ah-errors.H:522
#define ah_invalid_argument_if(C)
Throws std::invalid_argument if condition holds.
Definition ah-errors.H:639
DRY (Don't Repeat Yourself) utilities and macros.
#define Special_Ctors(Set_Type, Type)
Generates special constructors for containers.
Definition ahDry.H:113
size_t size() const noexcept
Return the current dimension of array.
Open addressing hash table with linear probing collision resolution.
Definition tpl_olhash.H:170
OLhashTable(size_t len, Hash_Fct hash_fct, Hash_Fct, Cmp cmp, float lower_alpha, float upper_alpha, bool with_resize)
Definition tpl_olhash.H:257
~OLhashTable()
Release all occupied memory.
Definition tpl_olhash.H:290
OLhashTable(size_t len, Hash_Fct_Ptr hash_fct, Hash_Fct_Ptr, Cmp cmp, float lower_alpha, float upper_alpha, bool with_resize)
Definition tpl_olhash.H:274
void deallocate_bucket(Bucket *bucket)
Removes the record pointed to by record from the table.
Definition tpl_olhash.H:509
Key * search(const Key &key) const noexcept
Finds the key and returns the associated record if key is find inside the table; otherwise,...
Definition tpl_olhash.H:353
std::tuple< Bucket *, bool > hard_allocate_bucket(const Key &key) noexcept
Definition tpl_olhash.H:423
size_t next_index(const size_t i) const noexcept
Index of next bucket (handles wrap-around)
Definition tpl_olhash.H:473
std::function< size_t(const Key &)> Hash_Fct
Definition tpl_olhash.H:179
OLhashTable(OLhashTable &&other) noexcept
Definition tpl_olhash.H:315
OLhashTable(size_t len, Hash_Fct hash_fct, Hash_Fct_Ptr, Cmp cmp, float lower_alpha, float upper_alpha, bool with_resize)
Constructor with two hash functions for metaprogramming compatibility with ODhashTable type.
Definition tpl_olhash.H:282
void cleanup_deleted_chain(size_t idx) noexcept
Cleanup DELETED entries that are at the end of collision chains.
Definition tpl_olhash.H:483
static void update_stat_len(DynArray< size_t > &lens, size_t i)
Definition tpl_olhash.H:555
OLhashTable(const OLhashTable &other)
Definition tpl_olhash.H:308
bool is_valid_bucket(Bucket *bucket) const noexcept
Definition tpl_olhash.H:220
OLhashTable(size_t len=Primes::DefaultPrime, Hash_Fct_Ptr hash_fct=Aleph::dft_hash_ptr_fct< Key >, Cmp cmp=Cmp(), float lower_alpha=hash_default_lower_alpha, float upper_alpha=0.70f, bool with_resize=true)
Definition tpl_olhash.H:265
size_t(*)(const Key &) Hash_Fct_Ptr
Definition tpl_olhash.H:181
constexpr const Cmp & get_compare() const noexcept
Definition tpl_olhash.H:240
Bucket * allocate_bucket(const Key &key) noexcept
Definition tpl_olhash.H:376
constexpr Cmp & get_compare() noexcept
Definition tpl_olhash.H:242
void swap(OLhashTable &other) noexcept
Definition tpl_olhash.H:296
OLhashTable(const size_t l, Hash_Fct hash_f, Cmp cmp_f, const float l_alpha, const float u_alpha, const bool resize)
Instantiate a hash table with hash function __hash_fct and dimension len.
Definition tpl_olhash.H:248
OLhashTable & operator=(OLhashTable &&other) noexcept
Definition tpl_olhash.H:345
size_t prev_index(const size_t i) const noexcept
Index of previous bucket (handles wrap-around)
Definition tpl_olhash.H:467
static Bucket * key_to_bucket(Key *rec) noexcept
Definition tpl_olhash.H:197
OLhashTable & operator=(const OLhashTable &other)
Definition tpl_olhash.H:320
typename OhashCommon< OLhashTable< Key, Cmp >, Key >::Stats Stats
Definition tpl_olhash.H:557
Stats stats() const
Definition tpl_olhash.H:559
void remove(const Key &key)
Remove the key referenced by key.
Definition tpl_olhash.H:529
Equality test for containers.
Definition ah-dry.H:1826
Common methods to the Aleph-w ( ) containers.
Definition ah-dry.H:642
Common sequential searching methods on containers.
Definition ah-dry.H:196
LocateFunctions< Container, Type > * base() const
Definition ah-dry.H:204
CRTP mixin providing common operations for open addressing hash tables.
Definition hashDry.H:101
size_t resize(size_t new_size)
Resizes the hash table to a new capacity.
Definition hashDry.H:524
void clean_table()
Removes all entries from the table without deallocating storage.
Definition hashDry.H:166
void copy_from_table(const HashTbl &other)
Copies all entries from another hash table.
Definition hashDry.H:143
constexpr bool contains(const Key &key) const noexcept
Alias for has().
Definition hashDry.H:425
Mixin that adds STL begin()/end() and cbegin()/cend() to Aleph containers.
iterator end() noexcept
Return an STL-compatible end iterator.
iterator begin() noexcept
Return an STL-compatible iterator to the first element.
Equivalence relation constraint for equality comparators.
Definition ah-concepts.H:97
Common hash table utilities and base classes.
#define OHASH_COMMON(class_name)
Definition hash-dry.H:48
Common operations for open addressing hash tables (CRTP mixin).
const long double offset[]
Offset values indexed by symbol string length (bounded by MAX_OFFSET_INDEX)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
and
Check uniqueness with explicit hash + equality functors.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
const float hash_default_lower_alpha
Definition hash-dry.C:38
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
T sum(const Container &container, const T &init=T{})
Compute sum of all elements.
const unsigned long DefaultPrime
Default prime number used when no specific size is requested.
Definition primes.C:381
size_t next_prime(unsigned long n)
Find the smallest prime number >= n from the database.
Definition primes.C:383
Prime number utilities for hash tables and mathematical operations.
Generic traversal of the container through its iterator.
Definition ah-dry.H:67
DynList< int > l