Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
tpl_linHash.H
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
50# ifndef TPL_LINHASH_H
51# define TPL_LINHASH_H
52
53# include <iostream>
54# include <primes.H>
55# include <dlink.H>
56# include <tpl_dynArray.H>
57# include <tpl_dnode.H>
58# include <htlist.H>
59# include <hashDry.H>
60# include <hash-fct.H>
61# include <hash-dry.H>
62# include <ah-errors.H>
63
64# ifdef N
65# define NBACKUP N
66# undef N
67# endif
68
69# ifdef M
70# define MBACKUP M
71# undef M
72# endif
73
74
75using namespace Aleph;
76
77namespace Aleph {
78
79# define LINBUCKET_BODY(BUCKETNAME) \
80 \
81 Dlink link; \
82 \
83public: \
84 \
85 BUCKETNAME(const BUCKETNAME & bucket) \
86 : Dnode<Key>(bucket) {} \
87 \
88 BUCKETNAME() {} \
89 \
90 BUCKETNAME(const Key & key) \
91 : Dnode<Key>(key) {} \
92 \
93 Key & get_key() noexcept { return this->get_data(); } \
94 \
95 Dlink * get_link() noexcept { return &link; } \
96 \
97 DLINK_TO_BASE(BUCKETNAME, link);
98
99
107 template <typename Key>
108class LinHashBucket : public Dnode<Key>
109{
111};
112
120 template <typename Key>
121class LinHashBucketVtl : public Dnode<Key>
122{
124
126 virtual ~LinHashBucketVtl() {}
127};
128
129
154 template <typename Key, template <class> class BucketType,
155 class Cmp = Aleph::equal_to<Key>>
157 : public HashStats<GenLinearHashTable<Key, BucketType, Cmp>>
158{
159 friend class HashStats<GenLinearHashTable<Key, BucketType, Cmp>>;
160
161public:
162
165 using Hash_Fct = std::function<size_t(const Key &)>;
166 using Hash_Fct_Ptr = size_t (*) (const Key &);
167
170
171 using Key_Type = Key;
172
173 using Item_Type = Key;
174
175private:
176
178
180
181 static size_t multiply_by_two(size_t n) noexcept { return n << 1; }
182
183 static size_t divide_by_two(size_t n) noexcept { return n >> 1; }
184
187
188protected:
189
191 Cmp cmp;
192
193private:
194
195 size_t M; // Table size
196 size_t N; // Number of elements in the table
197 size_t busy_slots_counter; // Number of occupied entries
198 bool remove_all_buckets; // Indicates if destructor should free
199 // the buckets
200
201protected:
202
203 float upper_alpha; // upper load factor
204 float lower_alpha; // lower load factor
205
206private:
207
208 size_t p; // index of the list being partitioned (or expanded)
209 size_t l; // number of times the table has been doubled
210 size_t MP; // stores the value p + M
211 size_t MM; // product 2*M
212
213protected:
214
215 mutable size_t len;
216
217private:
218
219 size_t call_hash_fct(const Key & key) const
220 {
221 const auto hash = hash_fct(key);
222 const auto i = hash % M;
223 return i < p ? hash % MM : i;
224 }
225
226 void expand()
227 { // expand the table until load is below upper_alpha
228 for (float alpha = 1.0*N/MP; alpha >= upper_alpha; alpha = 1.0*N/MP)
229 {
230 BucketList * src_list_ptr = table.test(p);
231 if (src_list_ptr != nullptr) // is table[p] written?
232 if (not src_list_ptr->is_empty()) // is table[p] not empty?
233 {
234 BucketList * tgt_list_ptr = nullptr;
235
236 // traverse collision list and move buckets to table[p+M]
237 for (BucketItor it(*src_list_ptr); it.has_curr(); /* nothing */)
238 {
239 Bucket * bucket = static_cast<Bucket*>(it.get_curr());
240
241 it.next_ne(); // advance to the next element in the list
242
243 const Key & key = bucket->get_key();
244 const size_t i = hash_fct(key) % MM;
245 if (i == p) // does this key belong to table[p]?
246 continue; // yes ==> key stays in table[p] ==> next
247
248 if (tgt_list_ptr == nullptr)
249 tgt_list_ptr = &table.touch(MP);
250
251 // bucket does not belong to table[p] but to table[p+m] ==>
252 // remove bucket from table[i] and insert it in table[p+m]
253 bucket->del();
254 tgt_list_ptr->append(bucket);
255 }
256
257 if (src_list_ptr->is_empty()) // did table[p] become empty?
258 --busy_slots_counter; // yes ==> one empty slot
259
260 ++busy_slots_counter; // one new for table[p+M]
261 }
262 ++p;
263 ++MP;
264 if (p == M) // (p == 2*M) should the table size be doubled?
265 { // yes ==> change table size to 2*M
266 ++l; // Number of times the table has been doubled
267 p = 0;
268 MP = M = MM; // assign 2*M to them
270 }
271 }
272 }
273
275 { // contract the table until load is below lower_alpha
276 for (float alpha = (1.0*N)/MP; alpha <= lower_alpha and MP > len;
277 alpha = (1.0*N)/MP)
278 {
279 if (p == 0) // should the table size be halved?
280 { // yes ==> update table size to M/2
281 --l; // Number of times the table has been doubled decreases
282 MM = M; // divide by two
283 M = divide_by_two(M);
284 p = M - 1;
285 }
286 else
287 --p; // no ==> just reduce index p
288
289 --MP;
290 if (MP < table.size()) // Does table[MP] exist?
291 {
293 if (src_list_ptr != nullptr) // does entry for table[p+M] exist?
294 {
295 if (not src_list_ptr->is_empty()) // is table[p+M] empty?
296 { // no ==> merge the lists
297 BucketList & tgt_list = table.touch(p);// allocate table[p]
299 --busy_slots_counter; // table[p+M] became empty
300 }
301 table.cut_ne(MP); // eventually free memory of table[p+M]
302 }
303 }
304 }
305 }
306
307public:
308
310 void set_hash_fct(Hash_Fct fct) noexcept
311 {
312 hash_fct = fct;
313 }
314
316 {
318 }
319
321
322 Cmp & get_compare() noexcept { return cmp; }
323
324 const Cmp & get_compare() const noexcept { return cmp; }
325
327 float current_alpha() const noexcept { return 1.0*N/MP; }
328
329 protected:
330
332 float __lower_alpha, float __upper_alpha,
333 bool __remove_all_buckets, bool /* fake */)
334 : table(__len), hash_fct(__hash_fct), cmp(__cmp), M(__len), N(0),
337 p(0), l(0), MP(M), MM(multiply_by_two(M)), len(__len)
338 {
339 ah_length_error_if(M == 0) << "table's length is zero";
340
341 ah_length_error_if(MM > table.max_size()) << "table's length too big";
342
344 << "upper alpha must be greater than lower alpha";
345 }
346
347 public:
348
383
385 {
386 std::swap(table, other.table);
387 entries_list.swap(other.entries_list);
388 std::swap(hash_fct, other.hash_fct);
389 std::swap(cmp, other.cmp);
390 std::swap(M, other.M);
391 std::swap(N, other.N);
392 std::swap(busy_slots_counter, other.busy_slots_counter);
393 std::swap(remove_all_buckets, other.remove_all_buckets);
394 std::swap(upper_alpha, other.upper_alpha);
395 std::swap(lower_alpha, other.lower_alpha);
396 std::swap(p, other.p);
397 std::swap(l, other.l);
398 std::swap(MP, other.MP);
399 std::swap(MM, other.MM);
400 std::swap(len, other.len);
401 }
402
403
407 {
408 while (not entries_list.is_empty())
409 {
410 Bucket * bucket = Bucket::dlink_to_base(entries_list.remove_first_ne());
411 bucket->del(); // remove from the list in the array
412 bucket->get_link()->del(); // remove from entries list
413 delete bucket;
414 }
415
416 M = MP = len;
418 N = p = l = 0;
419 table.cut_ne(len);
420 }
421
423 {
425 empty();
426 }
427
428private:
429
430 Bucket *
431 search_in_bucket_list(BucketList * list, const Key & key) const
432 {
433 for (BucketItor it(*list); it.has_curr(); it.next_ne())
434 {
435 Bucket * bucket = static_cast<Bucket*>(it.get_curr());
436 if (cmp (key, bucket->get_key()))
437 return bucket;
438 }
439
440 return nullptr;
441 }
442
443public:
444
448 Bucket * search(const Key & key) const noexcept
449 {
450 auto i = call_hash_fct(key);
451 BucketList * list = table.test(i);
452 if (list == nullptr) // Has table[i] ever been written?
453 return nullptr; // No ==> element is not in the table
454
455 if (list->is_empty())
456 return nullptr;
457
458 return search_in_bucket_list(list, key);
459 }
460
462 const size_t & size() const noexcept { return N; }
463
465 bool is_empty() const noexcept { return N == 0; }
466
468 const size_t & capacity() const noexcept { return MP; }
469
471 const size_t & busy_slots() const noexcept { return busy_slots_counter; }
472
474 const size_t & expansions() const noexcept { return l; }
475
478 Bucket * insert(Bucket * bucket)
479 {
480 const size_t i = call_hash_fct(bucket->get_key());
481 BucketList & list = table.touch(i); // allocates memory for table[i]
482 if (list.is_empty())
484
485 if (search_in_bucket_list(&list, bucket->get_key()) != nullptr)
486 return nullptr; // duplicated key
487
488 list.append(bucket);
489 entries_list.append(bucket->get_link());
490 ++N;
491 expand();
492
493 return bucket;
494 }
495
497 {
498 const size_t i = call_hash_fct(bucket->get_key());
499 BucketList & list = table.touch(i); // allocates memory for table[i]
500 if (list.is_empty())
502
503 Bucket * p = search_in_bucket_list(&list, bucket->get_key());
504 if (p != nullptr)
505 return p; // duplicated key
506
507 list.append(bucket);
508 entries_list.append(bucket->get_link());
509 ++N;
510 expand();
511
512 return bucket;
513 }
514
516 size_t resize(size_t) noexcept { return MP; }
517
518private:
519
520 // This routine deletes bucket from hash table EXCEPT from
521 // entries_list. The end of this routine is to dry the deletion and to
522 // allow remove from other places; ofor instance, from the del()
523 // method of Iterator class
524 Bucket * remove_bucket(Bucket * bucket) noexcept
525 {
526 assert(bucket != nullptr);
527 assert(search(bucket->get_key()) == bucket);
528
529 Bucket * next = static_cast<Bucket*>(bucket->get_next());
530
531 bucket->del(); // remove from collision list
532
533 if (next->is_empty()) // collision list empty?
534 --busy_slots_counter; // yes ==> one empty slot
535
536 --N;
537 contract();
538
539 return bucket;
540 }
541
542public:
543
546 Bucket * remove(Bucket * bucket) noexcept
547 {
548 bucket->get_link()->del(); // remove from entries_list
549 return remove_bucket(bucket);
550 }
551
552 void print()
553 {
554 for (size_t i = 0; i < MP; ++i)
555 {
556 std::cout << "table[" << i << "] = [ ";
557
558 if (table.exist(i))
559 {
560 BucketList & list = table.access(i);
561
562 if (not list.is_empty())
563 for (BucketItor it(list); it.has_curr(); it.next_ne())
564 {
565 Bucket * bucket = static_cast<Bucket*>(it.get_curr());
566 const Key & key = bucket->get_key();
567 std::cout << key << ",";
568 }
569 }
570 std::cout << "]" << '\n';
571 }
572 }
573
575 {
576 private:
577
579 long pos = 0;
580
581 public:
582
585
587 using Item_Type = Bucket *;
588
591 : Dlink::Iterator(const_cast<Dlink &>(table.entries_list)),
592 hash_table(& const_cast<GenLinearHashTable &>(table))
593 {
594 // Empty
595 }
596
598 Iterator() noexcept { /* Empty */ }
599
601 {
602 return Bucket::dlink_to_base(this->Dlink::Iterator::get_curr_ne());
603 }
604
607 {
608 return Bucket::dlink_to_base(this->Dlink::Iterator::get_curr());
609 }
610
612 {
613 Bucket * bucket = Bucket::dlink_to_base(this->Dlink::Iterator::del());
614 return (Bucket *) hash_table->remove_bucket(bucket);
615 }
616
618 {
620 pos++;
621 }
622
623 void next()
624 {
625 this->Dlink::Iterator::next();
626 pos++;
627 }
628
629 void prev()
630 {
631 this->Dlink::Iterator::prev();
632 pos--;
633 }
634
635 long get_pos() const noexcept { return pos; }
636 };
637};
638
639
658template <typename Key, class Cmp = Aleph::equal_to<Key>>
660
661
680 template <typename Key, class Cmp = Aleph::equal_to<Key>>
682
683
684} // end namespace Aleph
685
686# ifdef NBACKUP
687# define N NBACKUP
688# undef NBACKUP
689# endif
690
691# ifdef MBACKUP
692# define M MBACKUP
693# undef MBACKUP
694# endif
695
696# endif // TPL_LINHASH_H
697
Exception handling system with formatted messages for Aleph-w.
#define ah_length_error_if(C)
Throws std::length_error if condition holds.
Definition ah-errors.H:698
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Definition ah-errors.H:522
Iterator on a list of Dnode objects.
Definition tpl_dnode.H:261
Node belonging to a double circular linked list with header node.
Definition tpl_dnode.H:106
T & get_key() noexcept
Definition tpl_dnode.H:241
T & append(const T &item)
Append a new item by copy.
Definition htlist.H:1562
Iterator() noexcept
Instantiate an empty iterator.
Bucket * get_curr()
Return the current bucket.
Bucket * Item_Type
The element type returned by get_curr().
Iterator(const GenLinearHashTable &table) noexcept
Instantiate an iterator over the hash table.
Generic linear hash table.
const size_t & capacity() const noexcept
Returns the table capacity.
void swap(GenLinearHashTable &other) noexcept
std::function< size_t(const Key &)> Hash_Fct
The hash function type.
void set_hash_fct(Hash_Fct_Ptr fct) noexcept
DynArray< BucketList > table
static size_t divide_by_two(size_t n) noexcept
Bucket * remove_bucket(Bucket *bucket) noexcept
Cmp & get_compare() noexcept
const size_t & size() const noexcept
Returns the number of elements in the table.
bool is_empty() const noexcept
return true is table is empty
GenLinearHashTable(size_t len=Primes::DefaultPrime, Hash_Fct_Ptr hash_fct=Aleph::dft_hash_fct< Key >, Cmp cmp=Cmp(), float lower_alpha=hash_default_lower_alpha, float upper_alpha=hash_default_upper_alpha, bool remove_all_buckets=true, bool with_resize=true)
Instantiate a generic linear hash table.
void empty() noexcept
Empty the entire table.
typename Dnode< Key >::Iterator BucketItor
static size_t multiply_by_two(size_t n) noexcept
const size_t & expansions() const noexcept
Returns the expansion level performed on the table.
size_t call_hash_fct(const Key &key) const
float current_alpha() const noexcept
return the current table load
Bucket * remove(Bucket *bucket) noexcept
Remove bucket from table.
BucketType< Key > Bucket
The bucket type.
const size_t & busy_slots() const noexcept
Returns the number of busy slots in the table.
Bucket * insert(Bucket *bucket)
Insert bucket in the table.
Hash_Fct get_hash_fct() const noexcept
size_t(*)(const Key &) Hash_Fct_Ptr
void set_hash_fct(Hash_Fct fct) noexcept
Set the internal hash function.
Bucket * search_or_insert(Bucket *bucket)
void contract() noexcept
const Cmp & get_compare() const noexcept
GenLinearHashTable(size_t __len, Hash_Fct __hash_fct, Cmp __cmp, float __lower_alpha, float __upper_alpha, bool __remove_all_buckets, bool)
Bucket * search(const Key &key) const noexcept
Search for key in the linear hash table.
size_t resize(size_t) noexcept
Provided for generic programming compatibility.
Bucket * search_in_bucket_list(BucketList *list, const Key &key) const
constexpr bool is_empty() const noexcept
Return true if list is empty.
Definition htlist.H:523
void concat_list(HTList &l) noexcept
Definition htlist.H:663
Bucket with virtual destructor for a hash table with collision resolution by separate chaining.
virtual ~LinHashBucketVtl()
virtual destructor.
Bucket without virtual destructor for a hash table with collision resolution by separate chaining.
CRTP mixin providing statistics and configuration for chained hash tables.
Definition hashDry.H:985
Common hash table utilities and base classes.
Collection of general-purpose hash functions.
Common operations for open addressing hash tables (CRTP mixin).
Singly linked list implementations with head-tail access.
Table::Bucket Bucket
Definition lin-hash.cc:54
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
const float hash_default_upper_alpha
Definition hash-dry.C:40
void next()
Advance all underlying iterators (bounds-checked).
Definition ah-zip.H:175
const float hash_default_lower_alpha
Definition hash-dry.C:38
DynList< T > maps(const C &c, Op op)
Classic map operation.
const unsigned long DefaultPrime
Default prime number used when no specific size is requested.
Definition primes.C:381
Prime number utilities for hash tables and mathematical operations.
Doubly linked list node with typed data.
Lazy and scalable dynamic array implementation.
#define LINBUCKET_BODY(BUCKETNAME)
Definition tpl_linHash.H:79