Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
tpl_linHash.H
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
50# ifndef TPL_LINHASH_H
51# define TPL_LINHASH_H
52
53# include <iostream>
54# include <primes.H>
55# include <dlink.H>
56# include <tpl_dynArray.H>
57# include <tpl_dnode.H>
58# include <htlist.H>
59# include <hashDry.H>
60# include <hash-fct.H>
61# include <hash-dry.H>
62# include <ah-errors.H>
63# include <ah-concepts.H>
64
65# ifdef N
66# define NBACKUP N
67# undef N
68# endif
69
70# ifdef M
71# define MBACKUP M
72# undef M
73# endif
74
75
76using namespace Aleph;
77
78namespace Aleph {
79
80# define LINBUCKET_BODY(BUCKETNAME) \
81 \
82 Dlink link; \
83 \
84public: \
85 \
86 BUCKETNAME(const BUCKETNAME & bucket) \
87 : Dnode<Key>(bucket) {} \
88 \
89 BUCKETNAME() {} \
90 \
91 BUCKETNAME(const Key & key) \
92 : Dnode<Key>(key) {} \
93 \
94 Key & get_key() noexcept { return this->get_data(); } \
95 \
96 Dlink * get_link() noexcept { return &link; } \
97 \
98 DLINK_TO_BASE(BUCKETNAME, link);
99
100
108 template <typename Key>
109class LinHashBucket : public Dnode<Key>
110{
112};
113
121 template <typename Key>
122class LinHashBucketVtl : public Dnode<Key>
123{
125
127 virtual ~LinHashBucketVtl() {}
128};
129
130
155 template <typename Key, template <class> class BucketType,
159 : public HashStats<GenLinearHashTable<Key, BucketType, Cmp>>
160{
161 friend class HashStats<GenLinearHashTable<Key, BucketType, Cmp>>;
162
163public:
164
167 using Hash_Fct = std::function<size_t(const Key &)>;
168 using Hash_Fct_Ptr = size_t (*) (const Key &);
169
172
173 using Key_Type = Key;
174
175 using Item_Type = Key;
176
177private:
178
180
182
183 static size_t multiply_by_two(size_t n) noexcept { return n << 1; }
184
185 static size_t divide_by_two(size_t n) noexcept { return n >> 1; }
186
189
190protected:
191
194
195private:
196
197 size_t M; // Table size
198 size_t N; // Number of elements in the table
199 size_t busy_slots_counter; // Number of occupied entries
200 bool remove_all_buckets; // Indicates if destructor should free
201 // the buckets
202
203protected:
204
205 float upper_alpha; // upper load factor
206 float lower_alpha; // lower load factor
207
208private:
209
210 size_t p; // index of the list being partitioned (or expanded)
211 size_t l; // number of times the table has been doubled
212 size_t MP; // stores the value p + M
213 size_t MM; // product 2*M
214
215protected:
216
217 mutable size_t len;
218
219private:
220
221 size_t call_hash_fct(const Key & key) const
222 {
223 const auto hash = hash_fct(key);
224 const auto i = hash % M;
225 return i < p ? hash % MM : i;
226 }
227
228 void expand()
229 { // expand the table until load is below upper_alpha
230 for (float alpha = 1.0*N/MP; alpha >= upper_alpha; alpha = 1.0*N/MP)
231 {
232 BucketList * src_list_ptr = table.test(p);
233 if (src_list_ptr != nullptr) // is table[p] written?
234 if (not src_list_ptr->is_empty()) // is table[p] not empty?
235 {
236 BucketList * tgt_list_ptr = nullptr;
237
238 // traverse collision list and move buckets to table[p+M]
239 for (BucketItor it(*src_list_ptr); it.has_curr(); /* nothing */)
240 {
241 Bucket * bucket = static_cast<Bucket*>(it.get_curr());
242
243 it.next_ne(); // advance to the next element in the list
244
245 const Key & key = bucket->get_key();
246 const size_t i = hash_fct(key) % MM;
247 if (i == p) // does this key belong to table[p]?
248 continue; // yes ==> key stays in table[p] ==> next
249
250 if (tgt_list_ptr == nullptr)
251 tgt_list_ptr = &table.touch(MP);
252
253 // bucket does not belong to table[p] but to table[p+m] ==>
254 // remove bucket from table[i] and insert it in table[p+m]
255 bucket->del();
256 tgt_list_ptr->append(bucket);
257 }
258
259 if (src_list_ptr->is_empty()) // did table[p] become empty?
260 --busy_slots_counter; // yes ==> one empty slot
261
262 ++busy_slots_counter; // one new for table[p+M]
263 }
264 ++p;
265 ++MP;
266 if (p == M) // (p == 2*M) should the table size be doubled?
267 { // yes ==> change table size to 2*M
268 ++l; // Number of times the table has been doubled
269 p = 0;
270 MP = M = MM; // assign 2*M to them
272 }
273 }
274 }
275
277 { // contract the table until load is below lower_alpha
278 for (float alpha = (1.0*N)/MP; alpha <= lower_alpha and MP > len;
279 alpha = (1.0*N)/MP)
280 {
281 if (p == 0) // should the table size be halved?
282 { // yes ==> update table size to M/2
283 --l; // Number of times the table has been doubled decreases
284 MM = M; // divide by two
285 M = divide_by_two(M);
286 p = M - 1;
287 }
288 else
289 --p; // no ==> just reduce index p
290
291 --MP;
292 if (MP < table.size()) // Does table[MP] exist?
293 {
295 if (src_list_ptr != nullptr) // does entry for table[p+M] exist?
296 {
297 if (not src_list_ptr->is_empty()) // is table[p+M] empty?
298 { // no ==> merge the lists
299 BucketList & tgt_list = table.touch(p);// allocate table[p]
301 --busy_slots_counter; // table[p+M] became empty
302 }
303 table.cut_ne(MP); // eventually free memory of table[p+M]
304 }
305 }
306 }
307 }
308
309public:
310
312 void set_hash_fct(Hash_Fct fct) noexcept
313 {
314 hash_fct = fct;
315 }
316
318 {
320 }
321
323
324 Cmp & get_compare() noexcept { return cmp; }
325
326 const Cmp & get_compare() const noexcept { return cmp; }
327
329 float current_alpha() const noexcept { return 1.0*N/MP; }
330
331 protected:
332
334 float __lower_alpha, float __upper_alpha,
335 bool __remove_all_buckets, bool /* fake */)
339 p(0), l(0), MP(M), MM(multiply_by_two(M)), len(__len)
340 {
341 ah_length_error_if(M == 0) << "table's length is zero";
342
343 ah_length_error_if(MM > table.max_size()) << "table's length too big";
344
346 << "upper alpha must be greater than lower alpha";
347 }
348
349 public:
350
385
387 {
388 std::swap(table, other.table);
389 entries_list.swap(other.entries_list);
390 std::swap(hash_fct, other.hash_fct);
391 std::swap(cmp, other.cmp);
392 std::swap(M, other.M);
393 std::swap(N, other.N);
394 std::swap(busy_slots_counter, other.busy_slots_counter);
395 std::swap(remove_all_buckets, other.remove_all_buckets);
396 std::swap(upper_alpha, other.upper_alpha);
397 std::swap(lower_alpha, other.lower_alpha);
398 std::swap(p, other.p);
399 std::swap(l, other.l);
400 std::swap(MP, other.MP);
401 std::swap(MM, other.MM);
402 std::swap(len, other.len);
403 }
404
405
409 {
410 while (not entries_list.is_empty())
411 {
412 Bucket * bucket = Bucket::dlink_to_base(entries_list.remove_first_ne());
413 bucket->del(); // remove from the list in the array
414 bucket->get_link()->del(); // remove from entries list
415 delete bucket;
416 }
417
418 M = MP = len;
420 N = p = l = 0;
421 table.cut_ne(len);
422 }
423
429 void clear() noexcept { empty(); }
430
437 bool contains(const Key & key) const noexcept { return search(key) != nullptr; }
438
440 {
442 empty();
443 }
444
445private:
446
447 Bucket *
448 search_in_bucket_list(BucketList * list, const Key & key) const
449 {
450 for (BucketItor it(*list); it.has_curr(); it.next_ne())
451 {
452 Bucket * bucket = static_cast<Bucket*>(it.get_curr());
453 if (cmp (key, bucket->get_key()))
454 return bucket;
455 }
456
457 return nullptr;
458 }
459
460public:
461
465 Bucket * search(const Key & key) const noexcept
466 {
467 auto i = call_hash_fct(key);
468 BucketList * list = table.test(i);
469 if (list == nullptr) // Has table[i] ever been written?
470 return nullptr; // No ==> element is not in the table
471
472 if (list->is_empty())
473 return nullptr;
474
475 return search_in_bucket_list(list, key);
476 }
477
479 const size_t & size() const noexcept { return N; }
480
482 bool is_empty() const noexcept { return N == 0; }
483
485 const size_t & capacity() const noexcept { return MP; }
486
488 const size_t & busy_slots() const noexcept { return busy_slots_counter; }
489
491 const size_t & expansions() const noexcept { return l; }
492
495 Bucket * insert(Bucket * bucket)
496 {
497 const size_t i = call_hash_fct(bucket->get_key());
498 BucketList & list = table.touch(i); // allocates memory for table[i]
499 if (list.is_empty())
501
502 if (search_in_bucket_list(&list, bucket->get_key()) != nullptr)
503 return nullptr; // duplicated key
504
505 list.append(bucket);
506 entries_list.append(bucket->get_link());
507 ++N;
508 expand();
509
510 return bucket;
511 }
512
514 {
515 const size_t i = call_hash_fct(bucket->get_key());
516 BucketList & list = table.touch(i); // allocates memory for table[i]
517 if (list.is_empty())
519
520 Bucket * p = search_in_bucket_list(&list, bucket->get_key());
521 if (p != nullptr)
522 return p; // duplicated key
523
524 list.append(bucket);
525 entries_list.append(bucket->get_link());
526 ++N;
527 expand();
528
529 return bucket;
530 }
531
533 size_t resize(size_t) noexcept { return MP; }
534
535private:
536
537 // This routine deletes bucket from hash table EXCEPT from
538 // entries_list. The end of this routine is to dry the deletion and to
539 // allow remove from other places; ofor instance, from the del()
540 // method of Iterator class
541 Bucket * remove_bucket(Bucket * bucket) noexcept
542 {
543 assert(bucket != nullptr);
544 assert(search(bucket->get_key()) == bucket);
545
546 Bucket * next = static_cast<Bucket*>(bucket->get_next());
547
548 bucket->del(); // remove from collision list
549
550 if (next->is_empty()) // collision list empty?
551 --busy_slots_counter; // yes ==> one empty slot
552
553 --N;
554 contract();
555
556 return bucket;
557 }
558
559public:
560
563 Bucket * remove(Bucket * bucket) noexcept
564 {
565 bucket->get_link()->del(); // remove from entries_list
566 return remove_bucket(bucket);
567 }
568
569 void print()
570 {
571 for (size_t i = 0; i < MP; ++i)
572 {
573 std::cout << "table[" << i << "] = [ ";
574
575 if (table.exist(i))
576 {
577 BucketList & list = table.access(i);
578
579 if (not list.is_empty())
580 for (BucketItor it(list); it.has_curr(); it.next_ne())
581 {
582 Bucket * bucket = static_cast<Bucket*>(it.get_curr());
583 const Key & key = bucket->get_key();
584 std::cout << key << ",";
585 }
586 }
587 std::cout << "]" << '\n';
588 }
589 }
590
592 {
593 private:
594
596 long pos = 0;
597
598 public:
599
602
604 using Item_Type = Bucket *;
605
608 : Dlink::Iterator(const_cast<Dlink &>(table.entries_list)),
609 hash_table(& const_cast<GenLinearHashTable &>(table))
610 {
611 // Empty
612 }
613
615 Iterator() noexcept { /* Empty */ }
616
618 {
619 return Bucket::dlink_to_base(this->Dlink::Iterator::get_curr_ne());
620 }
621
624 {
625 return Bucket::dlink_to_base(this->Dlink::Iterator::get_curr());
626 }
627
629 {
630 Bucket * bucket = Bucket::dlink_to_base(this->Dlink::Iterator::del());
631 return (Bucket *) hash_table->remove_bucket(bucket);
632 }
633
635 {
637 pos++;
638 }
639
640 void next()
641 {
642 this->Dlink::Iterator::next();
643 pos++;
644 }
645
646 void prev()
647 {
648 this->Dlink::Iterator::prev();
649 pos--;
650 }
651
652 long get_pos() const noexcept { return pos; }
653 };
654};
655
656
675template <typename Key, class Cmp = Aleph::equal_to<Key>>
677
678
697 template <typename Key, class Cmp = Aleph::equal_to<Key>>
699
700
701} // end namespace Aleph
702
703# ifdef NBACKUP
704# define N NBACKUP
705# undef NBACKUP
706# endif
707
708# ifdef MBACKUP
709# define M MBACKUP
710# undef MBACKUP
711# endif
712
713# endif // TPL_LINHASH_H
C++20 concepts for constraining comparison functors.
Exception handling system with formatted messages for Aleph-w.
#define ah_length_error_if(C)
Throws std::length_error if condition holds.
Definition ah-errors.H:698
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Definition ah-errors.H:522
Iterator on a list of Dnode objects.
Definition tpl_dnode.H:261
Node belonging to a double circular linked list with header node.
Definition tpl_dnode.H:106
T & get_key() noexcept
Definition tpl_dnode.H:241
Iterator() noexcept
Instantiate an empty iterator.
Bucket * get_curr()
Return the current bucket.
Bucket * Item_Type
The element type returned by get_curr().
Iterator(const GenLinearHashTable &table) noexcept
Instantiate an iterator over the hash table.
Generic linear hash table.
const size_t & capacity() const noexcept
Returns the table capacity.
void swap(GenLinearHashTable &other) noexcept
std::function< size_t(const Key &)> Hash_Fct
The hash function type.
void set_hash_fct(Hash_Fct_Ptr fct) noexcept
DynArray< BucketList > table
void clear() noexcept
Empties the container.
static size_t divide_by_two(size_t n) noexcept
Bucket * remove_bucket(Bucket *bucket) noexcept
Cmp & get_compare() noexcept
const size_t & size() const noexcept
Returns the number of elements in the table.
bool is_empty() const noexcept
return true is table is empty
GenLinearHashTable(size_t len=Primes::DefaultPrime, Hash_Fct_Ptr hash_fct=Aleph::dft_hash_ptr_fct< Key >, Cmp cmp=Cmp(), float lower_alpha=hash_default_lower_alpha, float upper_alpha=hash_default_upper_alpha, bool remove_all_buckets=true, bool with_resize=true)
Instantiate a generic linear hash table.
void empty() noexcept
Empty the entire table.
typename Dnode< Key >::Iterator BucketItor
static size_t multiply_by_two(size_t n) noexcept
const size_t & expansions() const noexcept
Returns the expansion level performed on the table.
size_t call_hash_fct(const Key &key) const
float current_alpha() const noexcept
return the current table load
Bucket * remove(Bucket *bucket) noexcept
Remove bucket from table.
BucketType< Key > Bucket
The bucket type.
const size_t & busy_slots() const noexcept
Returns the number of busy slots in the table.
bool contains(const Key &key) const noexcept
Checks if a key exists in the table.
Bucket * insert(Bucket *bucket)
Insert bucket in the table.
Hash_Fct get_hash_fct() const noexcept
size_t(*)(const Key &) Hash_Fct_Ptr
void set_hash_fct(Hash_Fct fct) noexcept
Set the internal hash function.
Bucket * search_or_insert(Bucket *bucket)
void contract() noexcept
const Cmp & get_compare() const noexcept
GenLinearHashTable(size_t __len, Hash_Fct __hash_fct, Cmp __cmp, float __lower_alpha, float __upper_alpha, bool __remove_all_buckets, bool)
Bucket * search(const Key &key) const noexcept
Search for key in the linear hash table.
size_t resize(size_t) noexcept
Provided for generic programming compatibility.
Bucket * search_in_bucket_list(BucketList *list, const Key &key) const
Bucket with virtual destructor for a hash table with collision resolution by separate chaining.
virtual ~LinHashBucketVtl()
virtual destructor.
Bucket without virtual destructor for a hash table with collision resolution by separate chaining.
CRTP mixin providing statistics and configuration for chained hash tables.
Definition hashDry.H:992
Common hash table utilities and base classes.
Common operations for open addressing hash tables (CRTP mixin).
Singly linked list implementations with head-tail access.
Table::Bucket Bucket
Definition lin-hash.cc:54
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
const float hash_default_upper_alpha
Definition hash-dry.C:40
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
void next()
Advance all underlying iterators (bounds-checked).
Definition ah-zip.H:171
const float hash_default_lower_alpha
Definition hash-dry.C:38
const unsigned long DefaultPrime
Default prime number used when no specific size is requested.
Definition primes.C:381
Prime number utilities for hash tables and mathematical operations.
Doubly linked list node with typed data.
Lazy and scalable dynamic array implementation.
#define LINBUCKET_BODY(BUCKETNAME)
Definition tpl_linHash.H:80