Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
hash_fct_test.cc
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
52#include <gtest/gtest.h>
53#include <hash-fct.H>
54
55#include <array>
56#include <cstring>
57#include <random>
58#include <set>
59#include <string>
60#include <vector>
61
62using namespace Aleph;
63
64class HashTestEnvironment : public ::testing::Environment
65{
66public:
67 void SetUp() override
68 {
69 init_jsw(42);
70 }
71};
72
73// Register the environment to initialize jsw_hash table before any tests run
74testing::Environment* const hash_env =
75 testing::AddGlobalTestEnvironment(new HashTestEnvironment);
76
77// ==================== Consistency Tests ====================
78// Hash functions must produce the same output for the same input
79
80class HashConsistencyTest : public ::testing::Test
81{
82protected:
83 const char* test_string = "test_string_for_hashing";
84 const std::string test_std_string = "test_string_for_hashing";
85 const int test_int = 42;
86 const double test_double = 3.14159265359;
87};
88
90{
91 auto h1 = add_hash(test_string);
92 auto h2 = add_hash(test_string);
93 EXPECT_EQ(h1, h2);
94
95 auto h3 = add_hash(test_std_string);
96 auto h4 = add_hash(test_std_string);
97 EXPECT_EQ(h3, h4);
98 EXPECT_EQ(h1, h3); // const char* and std::string should match
99
100 auto h5 = add_hash(test_int);
101 auto h6 = add_hash(test_int);
102 EXPECT_EQ(h5, h6);
103}
104
106{
107 auto h1 = xor_hash(test_string);
108 auto h2 = xor_hash(test_string);
109 EXPECT_EQ(h1, h2);
110
111 auto h3 = xor_hash(test_std_string);
112 auto h4 = xor_hash(test_std_string);
113 EXPECT_EQ(h3, h4);
114 EXPECT_EQ(h1, h3);
115
116 auto h5 = xor_hash(test_int);
117 auto h6 = xor_hash(test_int);
118 EXPECT_EQ(h5, h6);
119}
120
122{
123 auto h1 = rot_hash(test_string);
124 auto h2 = rot_hash(test_string);
125 EXPECT_EQ(h1, h2);
126
127 auto h3 = rot_hash(test_std_string);
128 EXPECT_EQ(h1, h3);
129
130 auto h5 = rot_hash(test_int);
131 auto h6 = rot_hash(test_int);
132 EXPECT_EQ(h5, h6);
133}
134
136{
137 auto h1 = djb_hash(test_string);
138 auto h2 = djb_hash(test_string);
139 EXPECT_EQ(h1, h2);
140
141 auto h3 = djb_hash(test_std_string);
142 EXPECT_EQ(h1, h3);
143
144 auto h5 = djb_hash(test_int);
145 auto h6 = djb_hash(test_int);
146 EXPECT_EQ(h5, h6);
147}
148
150{
151 auto h1 = sax_hash(test_string);
152 auto h2 = sax_hash(test_string);
153 EXPECT_EQ(h1, h2);
154
155 auto h3 = sax_hash(test_std_string);
156 EXPECT_EQ(h1, h3);
157
158 auto h5 = sax_hash(test_int);
159 auto h6 = sax_hash(test_int);
160 EXPECT_EQ(h5, h6);
161}
162
164{
165 auto h1 = fnv_hash(test_string);
166 auto h2 = fnv_hash(test_string);
167 EXPECT_EQ(h1, h2);
168
169 auto h3 = fnv_hash(test_std_string);
170 EXPECT_EQ(h1, h3);
171
172 auto h5 = fnv_hash(test_int);
173 auto h6 = fnv_hash(test_int);
174 EXPECT_EQ(h5, h6);
175}
176
178{
179 auto h1 = oat_hash(test_string);
180 auto h2 = oat_hash(test_string);
181 EXPECT_EQ(h1, h2);
182
183 auto h3 = oat_hash(test_std_string);
184 EXPECT_EQ(h1, h3);
185
186 auto h5 = oat_hash(test_int);
187 auto h6 = oat_hash(test_int);
188 EXPECT_EQ(h5, h6);
189}
190
192{
193 auto h1 = jsw_hash(test_string);
194 auto h2 = jsw_hash(test_string);
195 EXPECT_EQ(h1, h2);
196
197 auto h3 = jsw_hash(test_std_string);
198 EXPECT_EQ(h1, h3);
199
200 auto h5 = jsw_hash(test_int);
201 auto h6 = jsw_hash(test_int);
202 EXPECT_EQ(h5, h6);
203}
204
206{
207 auto h1 = elf_hash(test_string);
208 auto h2 = elf_hash(test_string);
209 EXPECT_EQ(h1, h2);
210
211 auto h3 = elf_hash(test_std_string);
212 EXPECT_EQ(h1, h3);
213
214 auto h5 = elf_hash(test_int);
215 auto h6 = elf_hash(test_int);
216 EXPECT_EQ(h5, h6);
217}
218
220{
221 unsigned seed = 12345;
222 // Use std::string for consistency - const char* would use template version
223 auto h1 = jen_hash(test_std_string, seed);
224 auto h2 = jen_hash(test_std_string, seed);
225 EXPECT_EQ(h1, h2);
226
227 // Also test via void* API directly
228 auto h3 = jen_hash((void*)test_string, strlen(test_string), seed);
229 auto h4 = jen_hash((void*)test_std_string.c_str(), test_std_string.size(), seed);
230 EXPECT_EQ(h3, h4);
231
232 auto h5 = jen_hash(test_int, seed);
233 auto h6 = jen_hash(test_int, seed);
234 EXPECT_EQ(h5, h6);
235}
236
238{
239 auto h1 = SuperFastHash(test_string);
240 auto h2 = SuperFastHash(test_string);
241 EXPECT_EQ(h1, h2);
242
243 auto h3 = SuperFastHash(test_std_string);
244 EXPECT_EQ(h1, h3);
245
246 auto h5 = SuperFastHash(test_int);
247 auto h6 = SuperFastHash(test_int);
248 EXPECT_EQ(h5, h6);
249}
250
252{
253 unsigned long seed = 42;
254 auto h1 = murmur3hash(test_std_string, seed);
255 auto h2 = murmur3hash(test_std_string, seed);
256 EXPECT_EQ(h1, h2);
257
258 auto h3 = murmur3hash(test_int, seed);
259 auto h4 = murmur3hash(test_int, seed);
260 EXPECT_EQ(h3, h4);
261}
262
264{
265 auto h1 = dft_hash_fct(test_std_string);
266 auto h2 = dft_hash_fct(test_std_string);
267 EXPECT_EQ(h1, h2);
268
269 auto h3 = dft_hash_fct(test_int);
270 auto h4 = dft_hash_fct(test_int);
271 EXPECT_EQ(h3, h4);
272}
273
275{
276 EXPECT_EQ(dft_hash_fct(test_std_string), wyhash_hash(test_std_string));
277 EXPECT_EQ(dft_hash_fct(test_int), wyhash_hash(test_int));
278
279 const auto seeded = dft_hash_fct(test_std_string, 99u);
280 EXPECT_EQ(seeded, wyhash_hash(test_std_string, 99u));
281 EXPECT_EQ(snd_hash_fct(test_std_string),
282 wyhash_hash(test_std_string, Aleph_Snd_Hash_Seed));
283}
284
286{
287 auto h1 = xxhash64_hash(test_string);
288 auto h2 = xxhash64_hash(test_string);
289 EXPECT_EQ(h1, h2);
290
291 auto h3 = xxhash64_hash(test_std_string);
292 EXPECT_EQ(h1, h3);
293
294 auto h4 = xxhash64_hash(test_int, 99);
295 auto h5 = xxhash64_hash(test_int, 99);
296 EXPECT_EQ(h4, h5);
297}
298
300{
301 auto h1 = wyhash_hash(test_string);
302 auto h2 = wyhash_hash(test_string);
303 EXPECT_EQ(h1, h2);
304
305 auto h3 = wyhash_hash(test_std_string);
306 EXPECT_EQ(h1, h3);
307
308 auto h4 = wyhash_hash(test_double, 77);
309 auto h5 = wyhash_hash(test_double, 77);
310 EXPECT_EQ(h4, h5);
311}
312
314{
315 auto h1 = siphash24_hash(test_string);
316 auto h2 = siphash24_hash(test_string);
317 EXPECT_EQ(h1, h2);
318
319 auto h3 = siphash24_hash(test_std_string);
320 EXPECT_EQ(h1, h3);
321
322 auto h4 = siphash24_hash(test_int, 1, 2);
323 auto h5 = siphash24_hash(test_int, 1, 2);
324 EXPECT_EQ(h4, h5);
325}
326
327// ==================== Edge Case Tests ====================
328
329class HashEdgeCaseTest : public ::testing::Test
330{
331protected:
332 const char* empty_string = "";
333 const std::string empty_std_string = "";
334 const char* single_char = "a";
335 std::string large_string;
336
337 void SetUp() override
338 {
339 // Create a large string (1MB)
340 large_string.resize(1024 * 1024, 'x');
341 }
342};
343
345{
346 // Empty strings should hash consistently (though maybe to 0 for simple hashes)
347 EXPECT_EQ(add_hash(empty_string), add_hash(empty_std_string));
348 EXPECT_EQ(xor_hash(empty_string), xor_hash(empty_std_string));
349 EXPECT_EQ(rot_hash(empty_string), rot_hash(empty_std_string));
350 EXPECT_EQ(djb_hash(empty_string), djb_hash(empty_std_string));
351 EXPECT_EQ(sax_hash(empty_string), sax_hash(empty_std_string));
352 EXPECT_EQ(fnv_hash(empty_string), fnv_hash(empty_std_string));
353 EXPECT_EQ(oat_hash(empty_string), oat_hash(empty_std_string));
354 EXPECT_EQ(jsw_hash(empty_string), jsw_hash(empty_std_string));
355 EXPECT_EQ(elf_hash(empty_string), elf_hash(empty_std_string));
356 EXPECT_EQ(SuperFastHash(empty_string), SuperFastHash(empty_std_string));
357}
358
360{
361 const std::string single_std = "a";
362
363 // All hashes should produce non-zero for single char 'a'
364 EXPECT_NE(add_hash(single_char), 0u);
365 EXPECT_NE(xor_hash(single_char), 0u);
366 EXPECT_NE(rot_hash(single_char), 0u);
367 EXPECT_NE(djb_hash(single_char), 0u);
368 EXPECT_NE(sax_hash(single_char), 0u);
369 EXPECT_NE(fnv_hash(single_char), 0u);
370 EXPECT_NE(oat_hash(single_char), 0u);
371 EXPECT_NE(jsw_hash(single_char), 0u);
372 EXPECT_NE(elf_hash(single_char), 0u);
373 EXPECT_NE(SuperFastHash(single_char), 0u);
374
375 // Verify consistency
376 EXPECT_EQ(add_hash(single_char), add_hash(single_std));
377}
378
380{
381 // Large strings should hash correctly
382 auto h1 = fnv_hash(large_string);
383 auto h2 = fnv_hash(large_string);
384 EXPECT_EQ(h1, h2);
385
386 auto h3 = SuperFastHash(large_string);
387 auto h4 = SuperFastHash(large_string);
388 EXPECT_EQ(h3, h4);
389
390 auto h5 = oat_hash(large_string);
391 auto h6 = oat_hash(large_string);
392 EXPECT_EQ(h5, h6);
393
394 auto h7 = murmur3hash(large_string, 42ul);
395 auto h8 = murmur3hash(large_string, 42ul);
396 EXPECT_EQ(h7, h8);
397
398 auto h9 = xxhash64_hash(large_string, 42);
399 auto h10 = xxhash64_hash(large_string, 42);
400 EXPECT_EQ(h9, h10);
401
402 auto h11 = wyhash_hash(large_string, 42);
403 auto h12 = wyhash_hash(large_string, 42);
404 EXPECT_EQ(h11, h12);
405
406 auto h13 = siphash24_hash(large_string);
407 auto h14 = siphash24_hash(large_string);
408 EXPECT_EQ(h13, h14);
409}
410
412{
413 // Test hashing data with embedded null bytes
414 char data1[] = {1, 0, 2, 0, 3};
415 char data2[] = {1, 0, 2, 0, 3};
416
422}
423
424// ==================== Different Inputs Produce Different Hashes ====================
425
426class HashDifferenceTest : public ::testing::Test
427{
428};
429
431{
432 const char* s1 = "hello";
433 const char* s2 = "world";
434 const char* s3 = "hello1"; // Very similar to s1
435
436 // Production-quality hashes should distinguish these
439
442
445
450}
451
465
467{
468 const std::string key = "test_key";
469
470 auto h1 = jen_hash(key, 1);
471 auto h2 = jen_hash(key, 2);
472 auto h3 = jen_hash(key, 100);
473
474 EXPECT_NE(h1, h2);
475 EXPECT_NE(h1, h3);
476 EXPECT_NE(h2, h3);
477
478 auto m1 = murmur3hash(key, 1ul);
479 auto m2 = murmur3hash(key, 2ul);
480 auto m3 = murmur3hash(key, 100ul);
481
482 EXPECT_NE(m1, m2);
483 EXPECT_NE(m1, m3);
484 EXPECT_NE(m2, m3);
485
486 auto x1 = xxhash64_hash(key, 1);
487 auto x2 = xxhash64_hash(key, 2);
488 EXPECT_NE(x1, x2);
489
490 auto w1 = wyhash_hash(key, 1);
491 auto w2 = wyhash_hash(key, 2);
492 EXPECT_NE(w1, w2);
493
494 auto s1 = siphash24_hash(key, 1, 2);
495 auto s2 = siphash24_hash(key, 3, 4);
496 EXPECT_NE(s1, s2);
497}
498
499// ==================== Void Pointer API Tests ====================
500
501class HashVoidPtrTest : public ::testing::Test
502{
503};
504
506{
507 int arr1[] = {1, 2, 3, 4, 5};
508 int arr2[] = {1, 2, 3, 4, 5};
509 int arr3[] = {1, 2, 3, 4, 6}; // Different
510
511 EXPECT_EQ(add_hash(arr1, sizeof(arr1)), add_hash(arr2, sizeof(arr2)));
512 EXPECT_NE(add_hash(arr1, sizeof(arr1)), add_hash(arr3, sizeof(arr3)));
513
514 EXPECT_EQ(fnv_hash(arr1, sizeof(arr1)), fnv_hash(arr2, sizeof(arr2)));
515 EXPECT_NE(fnv_hash(arr1, sizeof(arr1)), fnv_hash(arr3, sizeof(arr3)));
516
517 EXPECT_EQ(oat_hash(arr1, sizeof(arr1)), oat_hash(arr2, sizeof(arr2)));
518 EXPECT_EQ(SuperFastHash(arr1, sizeof(arr1)), SuperFastHash(arr2, sizeof(arr2)));
519 // jen_hash with void* requires explicit cast for arrays and explicit initval to avoid ambiguity with template overload
520 EXPECT_EQ(jen_hash((void*)arr1, sizeof(arr1), Default_Hash_Seed), jen_hash((void*)arr2, sizeof(arr2), Default_Hash_Seed));
521}
522
524{
525 // Use a packed struct or memset to ensure no padding issues
526 struct TestStruct
527 {
528 int a;
529 double b;
530 char c;
531 };
532
533 // Zero-initialize to avoid uninitialized padding bytes
534 TestStruct s1, s2, s3;
535 memset(&s1, 0, sizeof(s1));
536 memset(&s2, 0, sizeof(s2));
537 memset(&s3, 0, sizeof(s3));
538 s1.a = 1; s1.b = 2.5; s1.c = 'x';
539 s2.a = 1; s2.b = 2.5; s2.c = 'x';
540 s3.a = 1; s3.b = 2.5; s3.c = 'y';
541
542 EXPECT_EQ(fnv_hash(&s1, sizeof(s1)), fnv_hash(&s2, sizeof(s2)));
543 EXPECT_NE(fnv_hash(&s1, sizeof(s1)), fnv_hash(&s3, sizeof(s3)));
544
545 EXPECT_EQ(oat_hash(&s1, sizeof(s1)), oat_hash(&s2, sizeof(s2)));
546 EXPECT_EQ(SuperFastHash(&s1, sizeof(s1)), SuperFastHash(&s2, sizeof(s2)));
547}
548
549// ==================== Template API Tests ====================
550
551class HashTemplateTest : public ::testing::Test
552{
553};
554
556{
557 int val = 12345;
558
559 // Template versions should work
560 auto h1 = add_hash(val);
561 auto h2 = add_hash(&val, sizeof(val));
562 EXPECT_EQ(h1, h2);
563
564 auto h3 = fnv_hash(val);
565 auto h4 = fnv_hash(&val, sizeof(val));
566 EXPECT_EQ(h3, h4);
567}
568
570{
571 double val = 3.14159;
572
573 auto h1 = fnv_hash(val);
574 auto h2 = fnv_hash(&val, sizeof(val));
575 EXPECT_EQ(h1, h2);
576
577 auto h3 = oat_hash(val);
578 auto h4 = oat_hash(&val, sizeof(val));
579 EXPECT_EQ(h3, h4);
580}
581
583{
584 long long val = 9223372036854775807LL;
585
586 auto h1 = fnv_hash(val);
587 auto h2 = fnv_hash(&val, sizeof(val));
588 EXPECT_EQ(h1, h2);
589
590 auto h3 = SuperFastHash(val);
591 auto h4 = SuperFastHash(&val, sizeof(val));
592 EXPECT_EQ(h3, h4);
593}
594
595// ==================== Pair Hash Functions ====================
596
598{
599 std::pair<int, int> p1{1, 2};
600 std::pair<int, int> p2{1, 2};
601 std::pair<int, int> p3{2, 1};
602
603 auto h1 = pair_dft_hash_fct(p1);
604 auto h2 = pair_dft_hash_fct(p2);
605 auto h3 = pair_dft_hash_fct(p3);
606
607 EXPECT_EQ(h1, h2);
608 // hash_combine is non-commutative: {1,2} and {2,1} must differ
609 EXPECT_NE(h1, h3);
610}
611
613{
614 std::pair<std::string, std::string> p1{"hello", "world"};
615 std::pair<std::string, std::string> p2{"hello", "world"};
616
617 auto h1 = pair_snd_hash_fct(p1);
618 auto h2 = pair_snd_hash_fct(p2);
619
620 EXPECT_EQ(h1, h2);
621}
622
624{
625 std::pair<char, std::string> p1{'a', "world"};
626 std::pair<std::string, char> p2{"hello", 'z'};
627
630}
631
633{
634 std::pair<char, std::string> p1{'a', "world"};
635 std::pair<std::string, char> p2{"hello", 'z'};
636
637 const auto h1 = pair_dft_hash_fct(p1);
639 const auto h3 = pair_snd_hash_fct(p2);
641
642 EXPECT_EQ(h1, h2);
643 EXPECT_EQ(h3, h4);
644}
645
646// ==================== Aleph_Hash / ADL Customization ====================
647
648namespace UserTypeNS
649{
650 // A non-trivially-copyable type that opts into Aleph hashing via ADL.
651 struct Point3D
652 {
653 double x, y, z;
654 std::string label; // makes it non-trivially-copyable
655 Point3D(double x, double y, double z, std::string l)
656 : x(x), y(y), z(z), label(std::move(l)) {}
657 };
658
659 inline size_t aleph_hash_value(const Point3D & p) noexcept
660 {
661 size_t h = Aleph::dft_hash_fct(p.label);
665 return h;
666 }
667} // namespace UserTypeNS
668
670{
671 UserTypeNS::Point3D p1{1.0, 2.0, 3.0, "origin"};
672 UserTypeNS::Point3D p2{1.0, 2.0, 3.0, "origin"};
673
676}
677
684
692
694{
695 // Point3D has aleph_hash_value → HashableByADL should be true
697 // int has no aleph_hash_value → HashableByADL should be false
699 // std::string has no aleph_hash_value → HashableByADL should be false
701}
702
703// ==================== Distribution Quality Tests ====================
704
705class HashDistributionTest : public ::testing::Test
706{
707protected:
708 static constexpr size_t NUM_KEYS = 10000;
709 std::vector<std::string> keys;
710
711 void SetUp() override
712 {
713 std::mt19937 gen(42); // Fixed seed for reproducibility
714 std::uniform_int_distribution<> len_dist(5, 20);
715 std::uniform_int_distribution<> char_dist('a', 'z');
716
717 for (size_t i = 0; i < NUM_KEYS; ++i)
718 {
719 int len = len_dist(gen);
720 std::string s;
721 s.reserve(len);
722 for (int j = 0; j < len; ++j)
723 s += static_cast<char>(char_dist(gen));
724 keys.push_back(std::move(s));
725 }
726 }
727
728 // Measure hash uniqueness (collision ratio without modulo)
729 // Good hash functions should have very few or no collisions
730 double measureHashUniqueness(const std::vector<size_t>& hashes)
731 {
732 std::set<size_t> unique_hashes(hashes.begin(), hashes.end());
733 return static_cast<double>(unique_hashes.size()) /
734 static_cast<double>(hashes.size());
735 }
736};
737
738// Test that production-quality hash functions produce mostly unique hashes
739// (without modulo). We expect >90% uniqueness for 10000 distinct random strings.
741{
742 std::vector<size_t> hashes;
743 hashes.reserve(NUM_KEYS);
744 for (const auto& key : keys)
745 hashes.push_back(fnv_hash(key));
746
747 double uniqueness = measureHashUniqueness(hashes);
748 EXPECT_GT(uniqueness, 0.90) << "FNV hash uniqueness: " << uniqueness;
749}
750
752{
753 std::vector<size_t> hashes;
754 hashes.reserve(NUM_KEYS);
755 for (const auto& key : keys)
756 hashes.push_back(oat_hash(key));
757
758 double uniqueness = measureHashUniqueness(hashes);
759 EXPECT_GT(uniqueness, 0.90) << "OAT hash uniqueness: " << uniqueness;
760}
761
763{
764 std::vector<size_t> hashes;
765 hashes.reserve(NUM_KEYS);
766 for (const auto& key : keys)
767 hashes.push_back(SuperFastHash(key));
768
769 double uniqueness = measureHashUniqueness(hashes);
770 EXPECT_GT(uniqueness, 0.90) << "SuperFastHash uniqueness: " << uniqueness;
771}
772
774{
775 std::vector<size_t> hashes;
776 hashes.reserve(NUM_KEYS);
777 for (const auto& key : keys)
778 hashes.push_back(jen_hash(key, 0));
779
780 double uniqueness = measureHashUniqueness(hashes);
781 EXPECT_GT(uniqueness, 0.90) << "Jenkins hash uniqueness: " << uniqueness;
782}
783
785{
786 std::vector<size_t> hashes;
787 hashes.reserve(NUM_KEYS);
788 for (const auto& key : keys)
789 hashes.push_back(murmur3hash(key, 42ul));
790
791 double uniqueness = measureHashUniqueness(hashes);
792 EXPECT_GT(uniqueness, 0.90) << "MurmurHash3 uniqueness: " << uniqueness;
793}
794
796{
797 std::vector<size_t> hashes;
798 hashes.reserve(NUM_KEYS);
799 for (const auto& key : keys)
800 hashes.push_back(dft_hash_fct(key));
801
802 double uniqueness = measureHashUniqueness(hashes);
803 EXPECT_GT(uniqueness, 0.90) << "dft_hash_fct uniqueness: " << uniqueness;
804}
805
807{
808 std::vector<size_t> hashes;
809 hashes.reserve(NUM_KEYS);
810 for (const auto& key : keys)
811 hashes.push_back(xxhash64_hash(key, 42));
812
813 double uniqueness = measureHashUniqueness(hashes);
814 EXPECT_GT(uniqueness, 0.90) << "xxhash64 uniqueness: " << uniqueness;
815}
816
818{
819 std::vector<size_t> hashes;
820 hashes.reserve(NUM_KEYS);
821 for (const auto& key : keys)
822 hashes.push_back(wyhash_hash(key, 42));
823
824 double uniqueness = measureHashUniqueness(hashes);
825 EXPECT_GT(uniqueness, 0.90) << "wyhash uniqueness: " << uniqueness;
826}
827
829{
830 std::vector<size_t> hashes;
831 hashes.reserve(NUM_KEYS);
832 for (const auto& key : keys)
833 hashes.push_back(siphash24_hash(key));
834
835 double uniqueness = measureHashUniqueness(hashes);
836 EXPECT_GT(uniqueness, 0.90) << "siphash24 uniqueness: " << uniqueness;
837}
838
839// ==================== Avalanche Property Tests ====================
840// Small changes in input should cause large changes in output
841
842class HashAvalancheTest : public ::testing::Test
843{
844protected:
845 // Count bit differences between two hash values
846 static int bitDifference(size_t h1, size_t h2)
847 {
848 size_t diff = h1 ^ h2;
849 int count = 0;
850 while (diff)
851 {
852 count += diff & 1;
853 diff >>= 1;
854 }
855 return count;
856 }
857};
858
860{
861 // Changing a single bit should cause ~50% of output bits to change
862 // for a good hash function
863 unsigned char data1[16] = {0};
864 unsigned char data2[16] = {0};
865 data2[0] = 1; // Single bit difference
866
867 auto h1 = jen_hash(data1, 16);
868 auto h2 = jen_hash(data2, 16);
869
870 int bit_diff = bitDifference(h1, h2);
871 // Good hash should have at least 20% of bits different
872 EXPECT_GT(bit_diff, (int)(sizeof(size_t) * 8 * 0.2))
873 << "Jenkins hash has poor avalanche property";
874}
875
877{
878 unsigned char data1[16] = {0};
879 unsigned char data2[16] = {0};
880 data2[0] = 1;
881
882 auto h1 = murmur3hash(std::string((char*)data1, 16), 42ul);
883 auto h2 = murmur3hash(std::string((char*)data2, 16), 42ul);
884
885 int bit_diff = bitDifference(h1, h2);
886 EXPECT_GT(bit_diff, (int)(sizeof(size_t) * 8 * 0.2))
887 << "MurmurHash3 has poor avalanche property";
888}
889
891{
892 unsigned char data1[16] = {0};
893 unsigned char data2[16] = {0};
894 data2[0] = 1;
895
896 auto h1 = xxhash64_hash(data1, 16, 42);
897 auto h2 = xxhash64_hash(data2, 16, 42);
898
899 int bit_diff = bitDifference(h1, h2);
900 EXPECT_GT(bit_diff, (int)(sizeof(size_t) * 8 * 0.2))
901 << "xxhash64 has poor avalanche property";
902}
903
905{
906 unsigned char data1[16] = {0};
907 unsigned char data2[16] = {0};
908 data2[0] = 1;
909
910 auto h1 = wyhash_hash(data1, 16, 42);
911 auto h2 = wyhash_hash(data2, 16, 42);
912
913 int bit_diff = bitDifference(h1, h2);
914 EXPECT_GT(bit_diff, (int)(sizeof(size_t) * 8 * 0.2))
915 << "wyhash has poor avalanche property";
916}
917
919{
920 unsigned char data1[16] = {0};
921 unsigned char data2[16] = {0};
922 data2[0] = 1;
923
924 auto h1 = siphash24_hash(data1, 16);
925 auto h2 = siphash24_hash(data2, 16);
926
927 int bit_diff = bitDifference(h1, h2);
928 EXPECT_GT(bit_diff, (int)(sizeof(size_t) * 8 * 0.2))
929 << "siphash24 has poor avalanche property";
930}
931
933{
934 unsigned char data1[16] = {0};
935 unsigned char data2[16] = {0};
936 data2[0] = 1;
937
938 auto h1 = oat_hash(data1, 16);
939 auto h2 = oat_hash(data2, 16);
940
941 int bit_diff = bitDifference(h1, h2);
942 EXPECT_GT(bit_diff, (int)(sizeof(size_t) * 8 * 0.2))
943 << "One-at-a-Time hash has poor avalanche property";
944}
945
946// ==================== Known Value Tests ====================
947// Test against known hash values for regression testing
948
950{
951 // "abc" = 97 + 98 + 99 = 294
952 EXPECT_EQ(add_hash("abc"), 294u);
953}
954
956{
957 // "abc" = 97 ^ 98 ^ 99 = 96
958 EXPECT_EQ(xor_hash("abc"), 96u);
959}
960
962{
963 // Empty string with fnv_hash should return the FNV offset basis
964 // Since our implementation iterates while (*p), empty string returns
965 // initial value
966 EXPECT_EQ(fnv_hash(""), 2166136261u);
967}
968
970{
971 static constexpr std::array<std::uint64_t, 16> expected =
972 {
973 0x726fdb47dd0e0e31ULL, 0x74f839c593dc67fdULL,
974 0x0d6c8009d9a94f5aULL, 0x85676696d7fb7e2dULL,
975 0xcf2794e0277187b7ULL, 0x18765564cd99a68dULL,
976 0xcbc9466e58fee3ceULL, 0xab0200f58b01d137ULL,
977 0x93f5f5799a932462ULL, 0x9e0082df0ba9e4b0ULL,
978 0x7a5dbbc594ddb9f3ULL, 0xf4b32f46226bada7ULL,
979 0x751e8fbc860ee5fbULL, 0x14ea5627c0843d90ULL,
980 0xf723ca908e7af2eeULL, 0xa129ca6149be45e5ULL
981 };
982
983 std::array<unsigned char, 16> msg = {};
984 for (size_t i = 0; i < msg.size(); ++i)
985 msg[i] = static_cast<unsigned char>(i);
986
987 for (size_t len = 0; len < expected.size(); ++len)
988 {
989 EXPECT_EQ(siphash24_hash(msg.data(), len,
990 0x0706050403020100ULL,
991 0x0f0e0d0c0b0a0908ULL),
992 static_cast<size_t>(expected[len]));
993 }
994}
995
996// ==================== Seeded Hash Tests ====================
997
999{
1000 const std::string data = "test";
1001 auto h1 = jen_hash(data, Default_Hash_Seed);
1002 auto h2 = jen_hash((void*)data.c_str(), data.size(), Default_Hash_Seed);
1003 EXPECT_EQ(h1, h2);
1004}
1005
1007{
1008 const std::string key = "test_key";
1009
1010 auto h1 = dft_hash_fct(key, 1ul);
1011 auto h2 = dft_hash_fct(key, 2ul);
1012
1013 EXPECT_NE(h1, h2); // Different seeds should produce different hashes
1014}
1015
1016// ==================== Type Safety Tests ====================
1017
1019{
1020 int signed_val = -1;
1021 unsigned int unsigned_val = static_cast<unsigned int>(-1);
1022
1023 // These should hash the same as they have the same bit representation
1024 auto h1 = fnv_hash(&signed_val, sizeof(signed_val));
1025 auto h2 = fnv_hash(&unsigned_val, sizeof(unsigned_val));
1026 EXPECT_EQ(h1, h2);
1027}
1028
1030{
1031 // Test that const char* and std::string versions produce same hashes
1032 // Note: char arr[] uses template version which includes null terminator,
1033 // so we test const char* and std::string which both hash the string content
1034 const char* ptr = "hello";
1035 std::string str = "hello";
1036
1037 EXPECT_EQ(fnv_hash(ptr), fnv_hash(str));
1038 EXPECT_EQ(oat_hash(ptr), oat_hash(str));
1039 EXPECT_EQ(djb_hash(ptr), djb_hash(str));
1040 EXPECT_EQ(sax_hash(ptr), sax_hash(str));
1043}
1044
1046{
1047 int value = 42;
1048 int * ptr = &value;
1049
1051 EXPECT_EQ(dft_hash_fct(ptr), SuperFastHash(&ptr, sizeof(ptr)));
1052
1054 EXPECT_EQ(snd_hash_fct(ptr), SuperFastHash(&ptr, sizeof(ptr), Aleph_Snd_Hash_Seed));
1055}
1056
1057// ==================== Performance Sanity Checks ====================
1058// These are not rigorous benchmarks but ensure hashes complete in
1059// reasonable time
1060
1062{
1063 // 10MB of data
1064 std::string large_data(10 * 1024 * 1024, 'x');
1065
1066 // These should complete without hanging
1067 auto h1 = fnv_hash(large_data);
1068 auto h2 = oat_hash(large_data);
1069 auto h3 = SuperFastHash(large_data);
1070 auto h4 = murmur3hash(large_data, 42ul);
1071 auto h5 = xxhash64_hash(large_data, 42);
1072 auto h6 = wyhash_hash(large_data, 42);
1074
1075 EXPECT_NE(h1, 0u);
1076 EXPECT_NE(h2, 0u);
1077 EXPECT_NE(h3, 0u);
1078 EXPECT_NE(h4, 0u);
1079 EXPECT_NE(h5, 0u);
1080 EXPECT_NE(h6, 0u);
1081 EXPECT_NE(h7, 0u);
1082}
1083
1085{
1086 // Hash 100,000 small integers
1087 size_t sum = 0;
1088 for (int i = 0; i < 100000; ++i)
1089 sum += fnv_hash(i);
1090
1091 EXPECT_NE(sum, 0u);
1092}
long double h
Definition btreepic.C:154
static int bitDifference(size_t h1, size_t h2)
const char * test_string
const std::string test_std_string
const double test_double
void SetUp() override
double measureHashUniqueness(const std::vector< size_t > &hashes)
static constexpr size_t NUM_KEYS
std::vector< std::string > keys
const std::string empty_std_string
void SetUp() override
const char * single_char
std::string large_string
const char * empty_string
void SetUp() override
Concept: T provides a semantic hash via ADL.
Definition hash-fct.H:869
#define TEST(name)
size_t sax_hash(const void *key, const size_t len) noexcept
Shift-Add-XOR hash (SAX hash)
Definition hash-fct.H:183
size_t SuperFastHash(const void *key, size_t len, std::uint32_t seed=0) noexcept
Paul Hsieh super fast hash function.
Definition hash-fct.H:408
size_t add_hash(const void *key, const size_t len) noexcept
Additive hash.
Definition hash-fct.H:101
size_t jsw_hash(const void *key, size_t len) noexcept
JSW hash (Julienne Walker)
Definition hash-fct.C:247
size_t xxhash64_hash(const void *key, size_t len, std::uint64_t seed) noexcept
xxHash64 from the xxHash family.
Definition hash-fct.C:611
size_t djb_hash(const void *key, const size_t len) noexcept
Bernstein's hash (DJB hash)
Definition hash-fct.H:163
size_t jen_hash(const void *key, size_t length, unsigned initval) noexcept
Jenkins hash (lookup3)
Definition hash-fct.C:325
size_t xor_hash(const void *key, const size_t len) noexcept
XOR hash.
Definition hash-fct.H:122
size_t fnv_hash(const void *key, const size_t len) noexcept
FNV-1a hash.
Definition hash-fct.H:203
size_t siphash24_hash(const void *key, size_t len, std::uint64_t key0, std::uint64_t key1) noexcept
SipHash-2-4 keyed hash.
Definition hash-fct.C:766
void init_jsw() noexcept
Initializes the randomized lookup table used by jsw_hash().
Definition hash-fct.C:219
size_t rot_hash(const void *key, const size_t len) noexcept
Rotating hash.
Definition hash-fct.H:143
size_t wyhash_hash(const void *key, size_t len, std::uint64_t seed) noexcept
wyhash non-cryptographic hash.
Definition hash-fct.C:694
size_t elf_hash(const void *key, const size_t len) noexcept
ELF hash.
Definition hash-fct.H:279
size_t oat_hash(const void *key, const size_t len) noexcept
One-at-a-Time hash (OAT hash)
Definition hash-fct.H:223
TEST_F(HashConsistencyTest, AddHashConsistency)
testing::Environment *const hash_env
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
size_t pair_snd_hash_fct(const std::pair< K1, K2 > &p) noexcept
Definition hash-fct.H:1134
static constexpr std::uint32_t Aleph_Snd_Hash_Seed
Fixed seed used by the secondary hash function.
Definition hash-fct.H:979
size_t snd_hash_fct(const Key &key) noexcept
Secondary default hash: different distribution from dft_hash_fct.
Definition hash-fct.H:1081
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
bool diff(const C1 &c1, const C2 &c2, Eq e=Eq())
Check if two containers differ.
size_t dft_hash_fct(const Key &key) noexcept
Primary default hash: best speed/quality trade-off.
Definition hash-fct.H:1003
void hash_combine(size_t &seed, size_t h) noexcept
Non-commutative hash combiner (Boost-style golden-ratio mix).
Definition hash-fct.H:850
size_t snd_hash_ptr_fct(const Key &key) noexcept
Definition hash-fct.H:1104
size_t pair_dft_hash_fct(const std::pair< K1, K2 > &p) noexcept
Definition hash-fct.H:1121
const unsigned Default_Hash_Seed
Definition hash-fct.C:45
size_t murmur3hash(const Key &key, std::uint32_t seed)
Definition hash-fct.H:334
size_t dft_hash_ptr_fct(const Key &key) noexcept
Definition hash-fct.H:1055
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
T sum(const Container &container, const T &init=T{})
Compute sum of all elements.
size_t aleph_hash_value(const Point3D &p) noexcept
STL namespace.
Point3D(double x, double y, double z, std::string l)
int keys[]
ValueArg< size_t > seed
Definition testHash.C:48
DynList< int > l