Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
hash_validation_test.cc
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9
10 Copyright (c) 2002-2026 Leandro Rabindranath Leon
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in all
20 copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 SOFTWARE.
29*/
30
48# include <gtest/gtest.h>
49# include <hash-fct.H>
50
51# include <algorithm>
52# include <array>
53# include <bit>
54# include <cmath>
55# include <cstdint>
56# include <cstring>
57# include <iomanip>
58# include <iostream>
59# include <numeric>
60# include <random>
61# include <sstream>
62# include <string>
63# include <string_view>
64# include <unordered_set>
65# include <vector>
66
67# include <tpl_array.H>
68
69using namespace Aleph;
70
71// ======================================================================
72// Infrastructure
73// ======================================================================
74
75namespace
76{
77
78struct HashFn
79{
80 const char * name = "";
81 size_t (* hash)(const void *, size_t) = nullptr;
82 bool is_strong = false;
83};
84
85// Wrappers that normalize the interface to (const void *, size_t) → size_t.
86namespace Wrap
87{
88 size_t add (const void * p, size_t len) { return add_hash(p, len); }
89 size_t xor_ (const void * p, size_t len) { return xor_hash(p, len); }
90 size_t rot (const void * p, size_t len) { return rot_hash(p, len); }
91 size_t djb (const void * p, size_t len) { return djb_hash(p, len); }
92 size_t sax (const void * p, size_t len) { return sax_hash(p, len); }
93 size_t fnv (const void * p, size_t len) { return fnv_hash(p, len); }
94 size_t oat (const void * p, size_t len) { return oat_hash(p, len); }
95 size_t jsw (const void * p, size_t len) { return jsw_hash(p, len); }
96 size_t elf (const void * p, size_t len) { return elf_hash(p, len); }
97 size_t jen (const void * p, size_t len)
98 { return jen_hash(p, len, Default_Hash_Seed); }
99 size_t sfh (const void * p, size_t len) { return SuperFastHash(p, len); }
100 size_t murmur3(const void * p, size_t len)
101 { return murmur3hash(std::string(static_cast<const char *>(p), len), 42u); }
102 size_t xxh64 (const void * p, size_t len) { return xxhash64_hash(p, len, 42); }
103 size_t wyh (const void * p, size_t len) { return wyhash_hash(p, len, 42); }
104 size_t sip24 (const void * p, size_t len) { return siphash24_hash(p, len); }
105} // namespace Wrap
106
107// Profile classification mirrors hash_statistical_test.cc.
109{
110 return {
111 {"murmur3hash", Wrap::murmur3, true},
112 {"xxhash64_hash", Wrap::xxh64, true},
113 {"wyhash_hash", Wrap::wyh, true},
114 {"siphash24_hash", Wrap::sip24, true},
115 };
116}
117
119{
120 return {
121 {"SuperFastHash", Wrap::sfh, false},
122 {"fnv_hash", Wrap::fnv, false},
123 {"oat_hash", Wrap::oat, false},
124 {"djb_hash", Wrap::djb, false},
125 };
126}
127
128// Legacy / weak functions — included in the ranking but not subject to
129// quality thresholds (their profile is Weak in hash_statistical_test.cc).
131{
132 return {
133 {"add_hash", Wrap::add, false},
134 {"xor_hash", Wrap::xor_, false},
135 {"rot_hash", Wrap::rot, false},
136 {"sax_hash", Wrap::sax, false},
137 {"jsw_hash", Wrap::jsw, false},
138 {"elf_hash", Wrap::elf, false},
139 {"jen_hash", Wrap::jen, false},
140 };
141}
142
144{
145 auto out = strong_candidates();
146 for (const auto & c : general_candidates())
147 out.append(c);
148 for (const auto & c : weak_candidates())
149 out.append(c);
150 return out;
151}
152
153// Generate N random keys of fixed length.
154std::vector<std::string> random_keys(size_t n, size_t len, std::uint64_t seed)
155{
156 std::mt19937_64 rng(seed);
157 std::uniform_int_distribution<int> byte_dist(0, 255);
158 std::vector<std::string> out;
159 out.reserve(n);
160 for (size_t i = 0; i < n; ++i)
161 {
162 std::string s(len, '\0');
163 for (size_t j = 0; j < len; ++j)
164 s[j] = static_cast<char>(byte_dist(rng));
165 out.push_back(std::move(s));
166 }
167 return out;
168}
169
170// Effective output bits (how many bits are actually exercised).
171size_t effective_bits(const HashFn & fn,
172 const std::vector<std::string> & keys)
173{
174 size_t mask = 0;
175 for (const auto & k : keys)
176 mask |= fn.hash(k.data(), k.size());
177 return std::max<size_t>(std::bit_width(mask), 1);
178}
179
180// ──────────────────────────────────────────────────────────────────────
181// Ranking infrastructure
182// ──────────────────────────────────────────────────────────────────────
183
184struct RankMetrics
185{
186 std::string name;
187 bool is_strong = false;
188 bool is_weak = false; // true for known weak-profile functions
189 double sac_max_bias = 1.0; // lower is better
190 double sac_avg_bias = 1.0;
191 double bic_max_corr = 1.0; // lower is better
192 double diff_worst = 1.0; // max |mean - bits/2| / (bits/2)
193 double multireschi = 999.0; // max reduced chi-square across resolutions
194 double adv_chi_ctr = 999.0; // chi on counter keys
195 double adv_chi_pfx = 999.0; // chi on long-prefix keys
196 bool align_ok = false;
197 double score = 999.0; // composite (lower is better)
198};
199
200// Lower-is-better composite score used for ranking.
201// Weights are chosen so that SAC/BIC/differential carry the most weight
202// (quality) while adversarial chi and multi-res chi contribute equally.
203double ranking_score(const RankMetrics & m)
204{
205 // Penalise if alignment is broken: add a large constant.
206 const double align_penalty = m.align_ok ? 0.0 : 10.0;
207
208 return 3.0 * m.sac_max_bias
209 + 2.0 * m.sac_avg_bias
210 + 2.0 * m.bic_max_corr
211 + 2.0 * m.diff_worst
212 + 1.0 * m.multireschi
213 + 0.5 * m.adv_chi_ctr
214 + 0.5 * m.adv_chi_pfx
216}
217
218// Compute compact ranking metrics for a single function.
219RankMetrics compute_rank_metrics(const HashFn & fn)
220{
221 // Weak-profile set must match all_tested() entries flagged as weak.
222 static constexpr std::string_view weak_names[] = {
223 "add_hash", "xor_hash", "rot_hash", "sax_hash",
224 "jsw_hash", "elf_hash", "jen_hash"
225 };
226
227 RankMetrics rm;
228 rm.name = fn.name;
229 rm.is_strong = fn.is_strong;
230 rm.is_weak = std::any_of(std::begin(weak_names), std::end(weak_names),
231 [&](std::string_view w){ return w == fn.name; });
232
233 constexpr size_t N_s = 4096;
234 constexpr size_t klen = 16;
235
236 auto keys = random_keys(N_s, klen, 0x1234ABCD5678ULL);
237 const size_t out_bits = effective_bits(fn, keys);
238 const size_t in_bits = klen * 8;
239
240 // ── SAC ────────────────────────────────────────────────────────────
241 {
242 std::vector<std::vector<size_t>> mat(in_bits,
243 std::vector<size_t>(out_bits, 0));
244 for (const auto & base : keys)
245 {
246 const size_t h0 = fn.hash(base.data(), base.size());
247 for (size_t i = 0; i < in_bits; ++i)
248 {
249 std::string mut = base;
250 mut[i / 8] ^= static_cast<char>(1u << (i % 8));
251 const size_t diff = h0 ^ fn.hash(mut.data(), mut.size());
252 for (size_t j = 0; j < out_bits; ++j)
253 mat[i][j] += (diff >> j) & 1u;
254 }
255 }
256 double max_bias = 0, sum_bias = 0;
257 for (size_t i = 0; i < in_bits; ++i)
258 for (size_t j = 0; j < out_bits; ++j)
259 {
260 const double p = static_cast<double>(mat[i][j]) / N_s;
261 const double bias = std::abs(p - 0.5);
262 max_bias = std::max(max_bias, bias);
263 sum_bias += bias;
264 }
265 rm.sac_max_bias = max_bias;
266 rm.sac_avg_bias = sum_bias / static_cast<double>(in_bits * out_bits);
267 }
268
269 // ── BIC (sampled) ─────────────────────────────────────────────────
270 {
271 std::mt19937_64 rng(0xB1C3);
272 constexpr size_t sampled_i = 16;
273 constexpr size_t pairs = 64;
274 double max_corr = 0;
275 for (size_t t = 0; t < sampled_i; ++t)
276 {
277 const size_t i = rng() % in_bits;
278 std::vector<size_t> diffs(N_s);
279 for (size_t s = 0; s < N_s; ++s)
280 {
281 const size_t h0 = fn.hash(keys[s].data(), keys[s].size());
282 std::string mut = keys[s];
283 mut[i / 8] ^= static_cast<char>(1u << (i % 8));
284 diffs[s] = h0 ^ fn.hash(mut.data(), mut.size());
285 }
286 for (size_t p = 0; p < pairs; ++p)
287 {
288 const size_t j = rng() % out_bits;
289 size_t k = rng() % (out_bits - 1);
290 if (k >= j) ++k;
291 double sj = 0, sk = 0, sjk = 0, sj2 = 0, sk2 = 0;
292 for (size_t s = 0; s < N_s; ++s)
293 {
294 const double bj = (diffs[s] >> j) & 1u;
295 const double bk = (diffs[s] >> k) & 1u;
296 sj += bj; sk += bk; sjk += bj*bk;
297 sj2 += bj*bj; sk2 += bk*bk;
298 }
299 const double n = N_s;
300 const double num = n*sjk - sj*sk;
301 const double den = std::sqrt((n*sj2-sj*sj)*(n*sk2-sk*sk));
302 if (den > 1e-12) max_corr = std::max(max_corr, std::abs(num/den));
303 }
304 }
305 rm.bic_max_corr = max_corr;
306 }
307
308 // ── Differential uniformity ────────────────────────────────────────
309 {
310 std::mt19937_64 rng(0xD1FF3);
311 const double ideal = static_cast<double>(out_bits) / 2.0;
312 double worst = 0;
313 for (size_t target : {1u, 4u, 16u})
314 {
315 double sum = 0;
316 for (const auto & base : keys)
317 {
318 std::string mut = base;
319 std::vector<size_t> bits(in_bits);
320 std::iota(bits.begin(), bits.end(), 0);
321 for (size_t f = 0; f < target and f < bits.size(); ++f)
322 {
323 std::uniform_int_distribution<size_t> dist(f, bits.size()-1);
324 std::swap(bits[f], bits[dist(rng)]);
325 const size_t b = bits[f];
326 mut[b/8] ^= static_cast<char>(1u << (b%8));
327 }
328 const size_t h0 = fn.hash(base.data(), base.size());
329 const size_t h1 = fn.hash(mut.data(), mut.size());
330 sum += std::popcount(h0 ^ h1);
331 }
332 const double mean = sum / N_s;
333 const double rel = std::abs(mean - ideal) / ideal;
334 worst = std::max(worst, rel);
335 }
336 rm.diff_worst = worst;
337 }
338
339 // ── Multi-resolution chi ───────────────────────────────────────────
340 {
341 constexpr size_t N_chi = 50000;
342 auto chi_keys = random_keys(N_chi, klen, 0x4E58);
343 const std::array<size_t, 4> buckets = {127, 1024, 4096, 8191};
344 double max_chi = 0;
345 for (const size_t b : buckets)
346 {
347 std::vector<size_t> cnt(b, 0);
348 for (const auto & k : chi_keys)
349 ++cnt[fn.hash(k.data(), k.size()) % b];
350 const double exp = static_cast<double>(N_chi) / b;
351 double chi = 0;
352 for (const size_t c : cnt)
353 { const double d = c - exp; chi += d*d/exp; }
354 max_chi = std::max(max_chi, chi / (b - 1));
355 }
356 rm.multireschi = max_chi;
357 }
358
359 // ── Adversarial: counter keys ──────────────────────────────────────
360 {
361 constexpr size_t N_adv = 65536;
362 constexpr size_t B_adv = 1024;
363 std::vector<size_t> cnt(B_adv, 0);
364 for (size_t i = 0; i < N_adv; ++i)
365 { std::uint64_t v = i; cnt[fn.hash(&v, sizeof(v)) % B_adv]++; }
366 const double exp = static_cast<double>(N_adv) / B_adv;
367 double chi = 0;
368 for (const size_t c : cnt) { const double d = c - exp; chi += d*d/exp; }
369 rm.adv_chi_ctr = chi / (B_adv - 1);
370 }
371
372 // ── Adversarial: long-prefix keys ─────────────────────────────────
373 {
374 constexpr size_t N_adv = 65536;
375 constexpr size_t B_adv = 1024;
376 constexpr size_t pfxlen = 128;
377 const std::string pfx(pfxlen, 'Z');
378 std::vector<size_t> cnt(B_adv, 0);
379 for (size_t i = 0; i < N_adv; ++i)
380 {
381 std::string k = pfx;
382 std::uint32_t s = static_cast<std::uint32_t>(i);
383 k.append(reinterpret_cast<const char *>(&s), sizeof(s));
384 cnt[fn.hash(k.data(), k.size()) % B_adv]++;
385 }
386 const double exp = static_cast<double>(N_adv) / B_adv;
387 double chi = 0;
388 for (const size_t c : cnt) { const double d = c - exp; chi += d*d/exp; }
389 rm.adv_chi_pfx = chi / (B_adv - 1);
390 }
391
392 // ── Alignment ─────────────────────────────────────────────────────
393 {
394 constexpr size_t test_len = 37;
395 constexpr size_t pad = 16;
396 std::vector<char> buf(test_len + pad);
397 std::mt19937_64 rng2(0xA119ULL);
398 for (auto & c : buf) c = static_cast<char>(rng2() & 0xff);
399 const size_t ref = fn.hash(buf.data(), test_len);
400 rm.align_ok = true;
401 for (size_t off = 1; off < pad; ++off)
402 {
403 std::memmove(buf.data() + off, buf.data(), test_len);
404 if (fn.hash(buf.data() + off, test_len) != ref)
405 { rm.align_ok = false; break; }
406 std::memmove(buf.data(), buf.data() + off, test_len);
407 }
408 }
409
410 rm.score = ranking_score(rm);
411 return rm;
412}
413
414} // anonymous namespace
415
416// ======================================================================
417// Test fixture
418// ======================================================================
419
420class HashValidation : public ::testing::Test
421{
422protected:
423 static void SetUpTestSuite()
424 {
425 init_jsw(42);
426 }
427};
428
429// ======================================================================
430// 1. Strict Avalanche Criterion (SAC)
431//
432// For each input bit i, flip it and measure the probability that each
433// output bit j changes. A perfect hash has P(j flips | i flipped) = 0.5
434// for all (i,j).
435//
436// We compute the full matrix and check:
437// - The per-cell bias max |P - 0.5| over all (i,j)
438// - The per-row mean (should be ~0.5)
439// - The global mean (should be ~0.5)
440// ======================================================================
441
443{
444 constexpr size_t N_samples = 2048;
445 constexpr size_t key_len = 16;
446
447 auto keys = random_keys(N_samples, key_len, 0x5AC00ULL + 42);
448
449 const size_t input_bits = key_len * 8;
450
451 std::cout << "\n=== Strict Avalanche Criterion (SAC) ===\n"
452 << "Samples: " << N_samples
453 << " Key length: " << key_len << " bytes\n\n";
454
455 for (const auto & fn : strong_candidates())
456 {
457 SCOPED_TRACE(fn.name);
458
459 const size_t out_bits = effective_bits(fn, keys);
460
461 // sac_matrix[i][j] = number of times output bit j flipped when input
462 // bit i was flipped, over all N_samples keys.
463 std::vector<std::vector<size_t>> sac_matrix(
464 input_bits, std::vector<size_t>(out_bits, 0));
465
466 for (const auto & base : keys)
467 {
468 const size_t h0 = fn.hash(base.data(), base.size());
469 for (size_t i = 0; i < input_bits; ++i)
470 {
471 std::string mutated = base;
472 mutated[i / 8] ^= static_cast<char>(1u << (i % 8));
473 const size_t h1 = fn.hash(mutated.data(), mutated.size());
474 const size_t diff = h0 ^ h1;
475 for (size_t j = 0; j < out_bits; ++j)
476 sac_matrix[i][j] += (diff >> j) & 1u;
477 }
478 }
479
480 // Compute statistics.
481 double max_bias = 0.0;
482 double sum_bias = 0.0;
483 size_t cells = 0;
484 double worst_row_mean = 0.0;
485
486 for (size_t i = 0; i < input_bits; ++i)
487 {
488 double row_sum = 0.0;
489 for (size_t j = 0; j < out_bits; ++j)
490 {
491 const double p = static_cast<double>(sac_matrix[i][j])
492 / N_samples;
493 const double bias = std::abs(p - 0.5);
494 max_bias = std::max(max_bias, bias);
495 sum_bias += bias;
496 row_sum += p;
497 ++cells;
498 }
499 const double row_mean = row_sum / out_bits;
500 worst_row_mean = std::max(worst_row_mean, std::abs(row_mean - 0.5));
501 }
502
503 const double avg_bias = sum_bias / cells;
504
505 std::cout << std::left << std::setw(16) << fn.name
506 << " out_bits=" << std::setw(3) << out_bits
507 << " max_bias=" << std::fixed << std::setprecision(4) << max_bias
508 << " avg_bias=" << avg_bias
509 << " worst_row=" << worst_row_mean << "\n";
510
511 // Strong hashes: max per-cell bias should be < 0.10, average < 0.03.
512 EXPECT_LT(max_bias, 0.10)
513 << fn.name << " SAC max per-cell bias exceeds 0.10";
514 EXPECT_LT(avg_bias, 0.03)
515 << fn.name << " SAC average bias exceeds 0.03";
517 << fn.name << " SAC worst row mean deviation exceeds 0.05";
518 }
519}
520
521// ======================================================================
522// 2. Bit Independence Criterion (BIC)
523//
524// For a given input-bit flip, check that *pairs* of output bits flip
525// independently. If output bits j and k are independent under flip of
526// input bit i, then Corr(flip_j, flip_k) ≈ 0.
527//
528// We sample a subset of (i, j, k) triples to keep it tractable.
529// ======================================================================
530
532{
533 constexpr size_t N_samples = 4096;
534 constexpr size_t key_len = 16;
535 constexpr size_t max_pairs = 256; // random (j,k) pairs per input bit i
536
537 auto keys = random_keys(N_samples, key_len, 0xB1CULL);
538 const size_t input_bits = key_len * 8;
539
540 std::cout << "\n=== Bit Independence Criterion (BIC) ===\n";
541
542 for (const auto & fn : strong_candidates())
543 {
544 SCOPED_TRACE(fn.name);
545 const size_t out_bits = effective_bits(fn, keys);
546 if (out_bits < 4)
547 continue;
548
549 // For each input bit, collect the flip vector for all output bits.
550 // flip_vectors[i] = vector of size N_samples, each element is the
551 // diff bitmask for that sample when input bit i is flipped.
552 // We only sample a few input bits to keep it fast.
553 std::mt19937_64 rng(0xB1C2ULL);
554 constexpr size_t sampled_input_bits = 32;
555 std::vector<size_t> input_bit_indices(sampled_input_bits);
556 for (size_t idx = 0; idx < sampled_input_bits; ++idx)
558
559 double max_corr = 0.0;
560 double sum_corr = 0.0;
561 size_t n_triples = 0;
562
563 for (const size_t i : input_bit_indices)
564 {
565 // Collect flip vectors for all samples.
566 std::vector<size_t> diffs(N_samples);
567 for (size_t s = 0; s < N_samples; ++s)
568 {
569 const size_t h0 = fn.hash(keys[s].data(), keys[s].size());
570 std::string mutated = keys[s];
571 mutated[i / 8] ^= static_cast<char>(1u << (i % 8));
572 const size_t h1 = fn.hash(mutated.data(), mutated.size());
573 diffs[s] = h0 ^ h1;
574 }
575
576 // Sample random (j, k) output-bit pairs.
577 for (size_t p = 0; p < max_pairs; ++p)
578 {
579 const size_t j = rng() % out_bits;
580 size_t k = rng() % (out_bits - 1);
581 if (k >= j)
582 ++k;
583
584 // Compute Pearson correlation between bit j flips and bit k flips.
585 double sum_jk = 0, sum_j = 0, sum_k = 0;
586 double sum_j2 = 0, sum_k2 = 0;
587 for (size_t s = 0; s < N_samples; ++s)
588 {
589 const double bj = static_cast<double>((diffs[s] >> j) & 1u);
590 const double bk = static_cast<double>((diffs[s] >> k) & 1u);
591 sum_j += bj;
592 sum_k += bk;
593 sum_jk += bj * bk;
594 sum_j2 += bj * bj;
595 sum_k2 += bk * bk;
596 }
597 const double n = static_cast<double>(N_samples);
598 const double num = n * sum_jk - sum_j * sum_k;
599 const double den = std::sqrt((n * sum_j2 - sum_j * sum_j) *
600 (n * sum_k2 - sum_k * sum_k));
601 if (den > 1e-12)
602 {
603 const double corr = std::abs(num / den);
604 max_corr = std::max(max_corr, corr);
605 sum_corr += corr;
606 ++n_triples;
607 }
608 }
609 }
610
611 const double avg_corr = n_triples > 0 ? sum_corr / n_triples : 0.0;
612
613 std::cout << std::left << std::setw(16) << fn.name
614 << " max_corr=" << std::fixed << std::setprecision(4) << max_corr
615 << " avg_corr=" << avg_corr
616 << " triples=" << n_triples << "\n";
617
618 EXPECT_LT(max_corr, 0.15)
619 << fn.name << " BIC max pairwise correlation exceeds 0.15";
620 EXPECT_LT(avg_corr, 0.05)
621 << fn.name << " BIC average pairwise correlation exceeds 0.05";
622 }
623}
624
625// ======================================================================
626// 3. Differential Uniformity
627//
628// Hash pairs of inputs at various Hamming distances and verify that the
629// output Hamming distance distribution is centered around bits/2
630// regardless of the input distance.
631// ======================================================================
632
634{
635 constexpr size_t N_samples = 4096;
636 constexpr size_t key_len = 16;
637
638 auto keys = random_keys(N_samples, key_len, 0xD1FFULL);
639
640 std::cout << "\n=== Differential Uniformity ===\n";
641
642 const std::array<size_t, 5> input_distances = {1, 2, 4, 8, 16};
643
644 for (const auto & fn : strong_candidates())
645 {
646 SCOPED_TRACE(fn.name);
647 const size_t out_bits = effective_bits(fn, keys);
648 const double ideal_mean = static_cast<double>(out_bits) / 2.0;
649
650 std::cout << std::left << std::setw(16) << fn.name;
651
652 for (const size_t target_dist : input_distances)
653 {
654 // For each key, flip exactly `target_dist` random bits and measure
655 // the output Hamming distance.
656 std::mt19937_64 rng(0xD1FF2ULL + target_dist);
657 double sum_out_dist = 0.0;
658 double sum_sq = 0.0;
659 size_t count = 0;
660
661 for (const auto & base : keys)
662 {
663 const size_t h0 = fn.hash(base.data(), base.size());
664
665 std::string mutated = base;
666 // Flip target_dist distinct random bits.
667 std::vector<size_t> all_bits(key_len * 8);
668 std::iota(all_bits.begin(), all_bits.end(), 0);
669 for (size_t f = 0; f < target_dist and f < all_bits.size(); ++f)
670 {
671 std::uniform_int_distribution<size_t> dist(f, all_bits.size() - 1);
672 const size_t pick = dist(rng);
673 std::swap(all_bits[f], all_bits[pick]);
674 const size_t bit = all_bits[f];
675 mutated[bit / 8] ^= static_cast<char>(1u << (bit % 8));
676 }
677
678 const size_t h1 = fn.hash(mutated.data(), mutated.size());
679 const double d = static_cast<double>(std::popcount(h0 ^ h1));
680 sum_out_dist += d;
681 sum_sq += d * d;
682 ++count;
683 }
684
685 const double mean = sum_out_dist / count;
686 const double variance = (sum_sq / count) - (mean * mean);
687 const double stddev = std::sqrt(std::max(variance, 0.0));
688
689 std::cout << " d=" << target_dist
690 << " mean=" << std::fixed << std::setprecision(2) << mean
691 << " sd=" << stddev;
692
693 // Mean output distance should be near ideal_mean regardless of
694 // input distance. Allow ±15% of ideal.
696 << fn.name << " differential mean off at input dist " << target_dist;
697 }
698 std::cout << "\n";
699 }
700}
701
702// ======================================================================
703// 4. Multi-Resolution Distribution (chi-square at multiple bucket counts)
704//
705// Good hashes should distribute uniformly at any table size, not just
706// powers of 2. We test primes and powers of 2.
707// ======================================================================
708
710{
711 constexpr size_t N_keys = 100000;
712 auto keys = random_keys(N_keys, 16, 0x4E57ULL);
713
714 const std::array<size_t, 8> bucket_counts =
715 {127, 256, 509, 1024, 2039, 4096, 8191, 16384};
716
717 std::cout << "\n=== Multi-Resolution Distribution ===\n"
718 << std::left << std::setw(16) << "hash";
719 for (const size_t b : bucket_counts)
720 std::cout << std::right << std::setw(8) << b;
721 std::cout << "\n";
722
723 for (const auto & fn : all_tested())
724 {
725 SCOPED_TRACE(fn.name);
726 std::cout << std::left << std::setw(16) << fn.name;
727
728 for (const size_t buckets : bucket_counts)
729 {
730 std::vector<size_t> counts(buckets, 0);
731 for (const auto & k : keys)
732 ++counts[fn.hash(k.data(), k.size()) % buckets];
733
734 const double expected = static_cast<double>(N_keys) / buckets;
735 double chi = 0.0;
736 for (const size_t c : counts)
737 {
738 const double diff = static_cast<double>(c) - expected;
739 chi += (diff * diff) / expected;
740 }
741 const double reduced = chi / static_cast<double>(buckets - 1);
742
743 std::cout << std::right << std::fixed << std::setprecision(3)
744 << std::setw(8) << reduced;
745
746 if (fn.is_strong)
747 {
748 EXPECT_LT(reduced, 2.50)
749 << fn.name << " chi-square too high at " << buckets << " buckets";
750 }
751 }
752 std::cout << "\n";
753 }
754}
755
756// ======================================================================
757// 5. Adversarial Pattern Tests
758//
759// Inputs designed to stress-test hash quality:
760// a) All-zero keys of varying lengths
761// b) Single-bit keys (only 1 bit set in the whole key)
762// c) Counter keys with small deltas
763// d) Keys differing only in high-order bytes
764// e) Cyclic/repeating patterns
765// f) Long common prefix with short varying suffix
766// ======================================================================
767
769{
770 std::cout << "\n=== Adversarial: all-zero keys of varying lengths ===\n";
771
772 for (const auto & fn : strong_candidates())
773 {
774 SCOPED_TRACE(fn.name);
775
776 // Hash zero-filled keys from length 1..256; they must all be distinct.
777 std::unordered_set<size_t> hashes;
778 for (size_t len = 1; len <= 256; ++len)
779 {
780 const std::string key(len, '\0');
781 hashes.insert(fn.hash(key.data(), key.size()));
782 }
783
784 const double uniqueness = static_cast<double>(hashes.size()) / 256.0;
785 std::cout << std::left << std::setw(16) << fn.name
786 << " uniqueness=" << std::fixed << std::setprecision(4)
787 << uniqueness << " (" << hashes.size() << "/256)\n";
788
789 EXPECT_GE(uniqueness, 0.98)
790 << fn.name << " too many collisions on zero-filled length-varying keys";
791 }
792}
793
795{
796 constexpr size_t key_len = 32;
797 const size_t total_bits = key_len * 8;
798
799 std::cout << "\n=== Adversarial: single-bit-set keys ===\n";
800
801 for (const auto & fn : strong_candidates())
802 {
803 SCOPED_TRACE(fn.name);
804
805 std::unordered_set<size_t> hashes;
806 for (size_t bit = 0; bit < total_bits; ++bit)
807 {
808 std::string key(key_len, '\0');
809 key[bit / 8] = static_cast<char>(1u << (bit % 8));
810 hashes.insert(fn.hash(key.data(), key.size()));
811 }
812
813 const double uniqueness =
814 static_cast<double>(hashes.size()) / total_bits;
815 std::cout << std::left << std::setw(16) << fn.name
816 << " uniqueness=" << std::fixed << std::setprecision(4)
817 << uniqueness << " (" << hashes.size() << "/" << total_bits << ")\n";
818
819 EXPECT_GE(uniqueness, 0.99)
820 << fn.name << " collisions on single-bit-set keys";
821 }
822}
823
825{
826 // Sequential 64-bit counter packed into 8 bytes: typical worst case for
827 // hash tables indexed by auto-increment IDs.
828 constexpr size_t N = 100000;
829 constexpr size_t buckets = 1024;
830
831 std::cout << "\n=== Adversarial: counter keys ===\n";
832
833 for (const auto & fn : all_tested())
834 {
835 SCOPED_TRACE(fn.name);
836
837 std::vector<size_t> counts(buckets, 0);
838 for (size_t i = 0; i < N; ++i)
839 {
840 std::uint64_t val = static_cast<std::uint64_t>(i);
841 counts[fn.hash(&val, sizeof(val)) % buckets]++;
842 }
843
844 const double expected = static_cast<double>(N) / buckets;
845 double chi = 0.0;
846 for (const size_t c : counts)
847 {
848 const double diff = static_cast<double>(c) - expected;
849 chi += (diff * diff) / expected;
850 }
851 const double reduced = chi / (buckets - 1);
852
853 std::cout << std::left << std::setw(16) << fn.name
854 << " chi=" << std::fixed << std::setprecision(3) << reduced
855 << "\n";
856
857 if (fn.is_strong)
858 EXPECT_LT(reduced, 3.0)
859 << fn.name << " poor distribution on sequential counter keys";
860 }
861}
862
864{
865 // Keys that are identical except in the last byte.
866 constexpr size_t key_len = 32;
867 constexpr size_t N = 256;
868
869 std::cout << "\n=== Adversarial: differ only in high byte ===\n";
870
871 for (const auto & fn : strong_candidates())
872 {
873 SCOPED_TRACE(fn.name);
874
875 std::unordered_set<size_t> hashes;
876 for (size_t v = 0; v < N; ++v)
877 {
878 std::string key(key_len, 'X');
879 key[key_len - 1] = static_cast<char>(v);
880 hashes.insert(fn.hash(key.data(), key.size()));
881 }
882
883 EXPECT_EQ(hashes.size(), N)
884 << fn.name << " collision on keys differing only in last byte";
885 }
886}
887
889{
890 // Repeating pattern "ABCD..." of varying length; tests whether the hash
891 // can distinguish different-length repetitions of the same cycle.
892 constexpr size_t max_len = 512;
893 const std::string cycle = "ABCDEFGH";
894
895 std::cout << "\n=== Adversarial: cyclic pattern keys ===\n";
896
897 for (const auto & fn : strong_candidates())
898 {
899 SCOPED_TRACE(fn.name);
900
901 std::unordered_set<size_t> hashes;
902 for (size_t len = 1; len <= max_len; ++len)
903 {
904 std::string key;
905 key.reserve(len);
906 for (size_t i = 0; i < len; ++i)
907 key.push_back(cycle[i % cycle.size()]);
908 hashes.insert(fn.hash(key.data(), key.size()));
909 }
910
911 const double uniqueness =
912 static_cast<double>(hashes.size()) / max_len;
913 std::cout << std::left << std::setw(16) << fn.name
914 << " uniqueness=" << std::fixed << std::setprecision(4)
915 << uniqueness << "\n";
916
917 EXPECT_GE(uniqueness, 0.99)
918 << fn.name << " collisions on cyclic-pattern keys";
919 }
920}
921
923{
924 // 128-byte common prefix, 4-byte varying suffix. Tests that the hash is
925 // not dominated by the prefix.
926 constexpr size_t prefix_len = 128;
927 constexpr size_t N = 65536;
928 constexpr size_t buckets = 1024;
929
930 std::cout << "\n=== Adversarial: long common prefix ===\n";
931
932 const std::string prefix(prefix_len, 'Z');
933
934 for (const auto & fn : all_tested())
935 {
936 SCOPED_TRACE(fn.name);
937
938 std::vector<size_t> counts(buckets, 0);
939 for (size_t i = 0; i < N; ++i)
940 {
941 std::string key = prefix;
942 std::uint32_t suffix = static_cast<std::uint32_t>(i);
943 key.append(reinterpret_cast<const char *>(&suffix), sizeof(suffix));
944 counts[fn.hash(key.data(), key.size()) % buckets]++;
945 }
946
947 const double expected = static_cast<double>(N) / buckets;
948 double chi = 0.0;
949 for (const size_t c : counts)
950 {
951 const double diff = static_cast<double>(c) - expected;
952 chi += (diff * diff) / expected;
953 }
954 const double reduced = chi / (buckets - 1);
955
956 std::cout << std::left << std::setw(16) << fn.name
957 << " chi=" << std::fixed << std::setprecision(3) << reduced
958 << "\n";
959
960 if (fn.is_strong)
961 EXPECT_LT(reduced, 3.0)
962 << fn.name
963 << " poor distribution on long-prefix adversarial keys";
964 }
965}
966
967// ======================================================================
968// 6. Alignment Sensitivity
969//
970// Hash the same data at different memory alignments. A correct hash
971// must produce identical output regardless of alignment.
972// ======================================================================
973
975{
976 constexpr size_t key_len = 37; // intentionally not a power of 2
977 // Allocate a buffer with extra room so we can shift the start by 0..15.
978 constexpr size_t pad = 16;
979 std::vector<char> buf(key_len + pad);
980
981 // Fill with a known pattern.
982 std::mt19937_64 rng(0xA119EULL);
983 for (auto & c : buf)
984 c = static_cast<char>(rng() & 0xff);
985
986 std::cout << "\n=== Alignment Sensitivity ===\n";
987
988 for (const auto & fn : all_tested())
989 {
990 SCOPED_TRACE(fn.name);
991
992 const size_t ref_hash = fn.hash(buf.data(), key_len);
993 bool all_match = true;
994
995 for (size_t offset = 1; offset < pad; ++offset)
996 {
997 // Copy the key data to a different alignment.
998 std::memmove(buf.data() + offset, buf.data(), key_len);
999 const size_t h = fn.hash(buf.data() + offset, key_len);
1000 // Restore original position.
1001 std::memmove(buf.data(), buf.data() + offset, key_len);
1002
1003 if (h != ref_hash)
1004 {
1005 all_match = false;
1006 break;
1007 }
1008 }
1009
1010 std::cout << std::left << std::setw(16) << fn.name
1011 << " alignment-stable: " << (all_match ? "yes" : "NO")
1012 << "\n";
1013
1015 << fn.name << " produces different hashes for different alignments";
1016 }
1017}
1018
1019// ======================================================================
1020// 7. Seed Independence
1021//
1022// Different seeds should produce statistically independent output.
1023// We measure the cross-seed correlation for the same set of keys.
1024// ======================================================================
1025
1027{
1028 constexpr size_t N = 8192;
1029 constexpr size_t key_len = 16;
1030
1031 auto keys = random_keys(N, key_len, 0x5EED1ULL);
1032
1033 std::cout << "\n=== Seed Independence ===\n";
1034
1035 // Only test seeded functions.
1036 struct SeededFn
1037 {
1038 const char * name;
1039 size_t (* hash)(const void *, size_t, std::uint64_t);
1040 };
1041
1042 const std::array<SeededFn, 3> seeded_fns = {{
1043 {"xxhash64_hash", [](const void * p, size_t l, std::uint64_t s) -> size_t
1044 { return xxhash64_hash(p, l, s); }},
1045 {"wyhash_hash", [](const void * p, size_t l, std::uint64_t s) -> size_t
1046 { return wyhash_hash(p, l, s); }},
1047 {"siphash24_hash", [](const void * p, size_t l, std::uint64_t s) -> size_t
1048 { return siphash24_hash(p, l, s, s ^ 0xdeadbeefULL); }},
1049 }};
1050
1051 const std::array<std::uint64_t, 4> seeds = {0, 1, 42, 0xffffffffffffffff};
1052
1053 for (const auto & fn : seeded_fns)
1054 {
1055 SCOPED_TRACE(fn.name);
1056
1057 // Compute hash vectors for each seed.
1058 std::vector<std::vector<size_t>> hash_vecs(seeds.size(),
1059 std::vector<size_t>(N));
1060 for (size_t si = 0; si < seeds.size(); ++si)
1061 for (size_t ki = 0; ki < N; ++ki)
1062 hash_vecs[si][ki] = fn.hash(keys[ki].data(), keys[ki].size(),
1063 seeds[si]);
1064
1065 // Compute Pearson correlation between all seed pairs (on the low 32 bits
1066 // cast to double to avoid saturation issues).
1067 double max_corr = 0.0;
1068 for (size_t a = 0; a < seeds.size(); ++a)
1069 for (size_t b = a + 1; b < seeds.size(); ++b)
1070 {
1071 double sum_a = 0, sum_b = 0, sum_ab = 0;
1072 double sum_a2 = 0, sum_b2 = 0;
1073 for (size_t k = 0; k < N; ++k)
1074 {
1075 const double va = static_cast<double>(
1076 hash_vecs[a][k] & 0xFFFFFFFFu);
1077 const double vb = static_cast<double>(
1078 hash_vecs[b][k] & 0xFFFFFFFFu);
1079 sum_a += va;
1080 sum_b += vb;
1081 sum_ab += va * vb;
1082 sum_a2 += va * va;
1083 sum_b2 += vb * vb;
1084 }
1085 const double n = static_cast<double>(N);
1086 const double num = n * sum_ab - sum_a * sum_b;
1087 const double den = std::sqrt((n * sum_a2 - sum_a * sum_a) *
1088 (n * sum_b2 - sum_b * sum_b));
1089 if (den > 1e-12)
1090 max_corr = std::max(max_corr, std::abs(num / den));
1091 }
1092
1093 std::cout << std::left << std::setw(16) << fn.name
1094 << " max_cross_seed_corr=" << std::fixed << std::setprecision(5)
1095 << max_corr << "\n";
1096
1097 EXPECT_LT(max_corr, 0.05)
1098 << fn.name << " seeds are correlated (|r| > 0.05)";
1099 }
1100}
1101
1102// ======================================================================
1103// 8. Sparse Key Test
1104//
1105// Keys with very few bits set, similar to SMHasher's "Sparse" test.
1106// Generate all keys with exactly K bits set (K = 1, 2, 3) in a
1107// fixed-size buffer and check for collisions.
1108// ======================================================================
1109
1111{
1112 constexpr size_t key_len = 8; // 64 bits total
1113 const size_t total_bits = key_len * 8;
1114
1115 std::cout << "\n=== Sparse Key Collisions ===\n";
1116
1117 for (const auto & fn : strong_candidates())
1118 {
1119 SCOPED_TRACE(fn.name);
1120
1121 // K = 1: all single-bit keys (64 keys)
1122 // K = 2: all two-bit keys (64 choose 2 = 2016 keys)
1123 // K = 3: all three-bit keys (64 choose 3 = 41664 keys)
1124
1125 for (size_t K : {1, 2, 3})
1126 {
1127 std::unordered_set<size_t> hashes;
1128 size_t key_count = 0;
1129
1130 if (K == 1)
1131 {
1132 for (size_t b0 = 0; b0 < total_bits; ++b0)
1133 {
1134 std::string key(key_len, '\0');
1135 key[b0 / 8] |= static_cast<char>(1u << (b0 % 8));
1136 hashes.insert(fn.hash(key.data(), key.size()));
1137 ++key_count;
1138 }
1139 }
1140 else if (K == 2)
1141 {
1142 for (size_t b0 = 0; b0 < total_bits; ++b0)
1143 for (size_t b1 = b0 + 1; b1 < total_bits; ++b1)
1144 {
1145 std::string key(key_len, '\0');
1146 key[b0 / 8] |= static_cast<char>(1u << (b0 % 8));
1147 key[b1 / 8] |= static_cast<char>(1u << (b1 % 8));
1148 hashes.insert(fn.hash(key.data(), key.size()));
1149 ++key_count;
1150 }
1151 }
1152 else // K == 3
1153 {
1154 for (size_t b0 = 0; b0 < total_bits; ++b0)
1155 for (size_t b1 = b0 + 1; b1 < total_bits; ++b1)
1156 for (size_t b2 = b1 + 1; b2 < total_bits; ++b2)
1157 {
1158 std::string key(key_len, '\0');
1159 key[b0 / 8] |= static_cast<char>(1u << (b0 % 8));
1160 key[b1 / 8] |= static_cast<char>(1u << (b1 % 8));
1161 key[b2 / 8] |= static_cast<char>(1u << (b2 % 8));
1162 hashes.insert(fn.hash(key.data(), key.size()));
1163 ++key_count;
1164 }
1165 }
1166
1167 const double uniqueness =
1168 static_cast<double>(hashes.size()) / key_count;
1169
1170 std::cout << std::left << std::setw(16) << fn.name
1171 << " K=" << K
1172 << " keys=" << std::setw(6) << key_count
1173 << " uniq=" << std::fixed << std::setprecision(6)
1174 << uniqueness << "\n";
1175
1176 // With 64-bit output and ≤ 41664 keys, birthday paradox says
1177 // collision probability is negligible (< 10^-10), so we expect 100%.
1178 if constexpr (sizeof(size_t) >= 8)
1179 EXPECT_GE(uniqueness, 0.9999)
1180 << fn.name << " sparse key collision at K=" << K;
1181 }
1182 }
1183}
1184
1185// ======================================================================
1186// 9. Permutation Test
1187//
1188// Hash all permutations of a short input and verify uniform distribution.
1189// For a 4-byte input, there are 4! = 24 permutations of its bytes.
1190// For a 5-byte input, 5! = 120 permutations.
1191// ======================================================================
1192
1194{
1195 std::cout << "\n=== Permutation Distribution ===\n";
1196
1197 // Use a 6-byte key → 720 permutations.
1198 std::string base = "ABCDEF";
1199 std::sort(base.begin(), base.end());
1200 constexpr size_t buckets = 64;
1201
1202 for (const auto & fn : strong_candidates())
1203 {
1204 SCOPED_TRACE(fn.name);
1205
1206 std::vector<size_t> counts(buckets, 0);
1207 size_t n_perms = 0;
1208 std::string perm = base;
1209
1210 do
1211 {
1212 counts[fn.hash(perm.data(), perm.size()) % buckets]++;
1213 ++n_perms;
1214 }
1215 while (std::next_permutation(perm.begin(), perm.end()));
1216
1217 const double expected = static_cast<double>(n_perms) / buckets;
1218 double chi = 0.0;
1219 for (const size_t c : counts)
1220 {
1221 const double diff = static_cast<double>(c) - expected;
1222 chi += (diff * diff) / expected;
1223 }
1224 const double reduced = chi / (buckets - 1);
1225
1226 // All permutations must produce distinct hashes.
1227 std::unordered_set<size_t> uniq;
1228 perm = base;
1229 do
1230 { uniq.insert(fn.hash(perm.data(), perm.size())); }
1231 while (std::next_permutation(perm.begin(), perm.end()));
1232
1233 std::cout << std::left << std::setw(16) << fn.name
1234 << " perms=" << n_perms
1235 << " chi=" << std::fixed << std::setprecision(3) << reduced
1236 << " uniq=" << uniq.size() << "/" << n_perms << "\n";
1237
1238 EXPECT_EQ(uniq.size(), n_perms)
1239 << fn.name << " collision among byte permutations";
1240 }
1241}
1242
1243// ======================================================================
1244// 10. Window Test
1245//
1246// Slide a small "window" of set bits through a zero-filled key and
1247// check that every position produces a different hash. This catches
1248// hashes that ignore certain byte positions.
1249// ======================================================================
1250
1252{
1253 constexpr size_t key_len = 32;
1254 constexpr size_t window_size = 4; // 4 bytes of 0xFF sliding through
1255
1256 std::cout << "\n=== Sliding Window Test ===\n";
1257
1258 for (const auto & fn : strong_candidates())
1259 {
1260 SCOPED_TRACE(fn.name);
1261
1262 const size_t positions = key_len - window_size + 1;
1263 std::unordered_set<size_t> hashes;
1264
1265 for (size_t pos = 0; pos < positions; ++pos)
1266 {
1267 std::string key(key_len, '\0');
1268 for (size_t w = 0; w < window_size; ++w)
1269 key[pos + w] = '\xff';
1270 hashes.insert(fn.hash(key.data(), key.size()));
1271 }
1272
1273 const double uniqueness =
1274 static_cast<double>(hashes.size()) / positions;
1275
1276 std::cout << std::left << std::setw(16) << fn.name
1277 << " positions=" << positions
1278 << " uniq=" << hashes.size()
1279 << " uniqueness=" << std::fixed << std::setprecision(4)
1280 << uniqueness << "\n";
1281
1282 EXPECT_EQ(hashes.size(), positions)
1283 << fn.name << " collision in sliding window test";
1284 }
1285}
1286
1287// ======================================================================
1288// 11. Length-Extension Sensitivity
1289//
1290// Verify that hash("ABC") != hash("ABC\0") != hash("ABC\0\0").
1291// Some hash functions that don't fully incorporate the length can be
1292// fooled by trailing zeros.
1293// ======================================================================
1294
1296{
1297 std::cout << "\n=== Length Extension Sensitivity ===\n";
1298
1299 const std::string base = "HashTestPayload";
1300
1301 // Weak functions (add_hash, xor_hash, elf_hash, …) are known to be
1302 // insensitive to trailing NUL bytes by design — the assertion is only
1303 // applied to strong and general-profile functions.
1304 for (const auto & fn : all_tested())
1305 {
1306 SCOPED_TRACE(fn.name);
1307
1308 std::unordered_set<size_t> hashes;
1309 for (size_t extra = 0; extra < 16; ++extra)
1310 {
1311 std::string key = base + std::string(extra, '\0');
1312 hashes.insert(fn.hash(key.data(), key.size()));
1313 }
1314
1315 const bool is_weak =
1316 (std::string_view(fn.name) == "add_hash" or
1317 std::string_view(fn.name) == "xor_hash" or
1318 std::string_view(fn.name) == "rot_hash" or
1319 std::string_view(fn.name) == "sax_hash" or
1320 std::string_view(fn.name) == "jsw_hash" or
1321 std::string_view(fn.name) == "elf_hash" or
1322 std::string_view(fn.name) == "jen_hash");
1323
1324 std::cout << std::left << std::setw(16) << fn.name
1325 << " distinct=" << hashes.size() << "/16"
1326 << (is_weak ? " [weak — not asserted]" : "")
1327 << "\n";
1328
1329 if (not is_weak)
1330 EXPECT_EQ(hashes.size(), 16u)
1331 << fn.name << " fails length-extension sensitivity";
1332 }
1333}
1334
1335// ======================================================================
1336// Ranking: composite quality score across all hash functions
1337//
1338// Metrics collected per function:
1339// SAC max/avg bias, BIC max correlation, differential worst deviation,
1340// multi-resolution chi-square, adversarial chi (counter + long prefix),
1341// alignment stability.
1342//
1343// Lower score is better. Strong-profile functions are expected to rank
1344// above the general-profile ones.
1345// ======================================================================
1346
1348{
1349 std::vector<RankMetrics> results;
1350 for (const auto & fn : all_tested())
1351 results.push_back(compute_rank_metrics(fn));
1352
1353 // Sort by composite score (ascending = better).
1354 std::sort(results.begin(), results.end(),
1355 [] (const RankMetrics & a, const RankMetrics & b)
1356 { return a.score < b.score; });
1357
1358 // ── Header ─────────────────────────────────────────────────────────
1359 std::cout
1360 << "\n"
1361 << "╔══════════════════════════════════════════════════════════════════════════════╗\n"
1362 << "║ [HASH-007] Hash Function Quality Ranking ║\n"
1363 << "╠══════════════════════════════════════════════════════════════════════════════╣\n"
1364 << "║ Rank Name SAC-max SAC-avg BIC-max Diff-wst MR-chi Adv-ctr ║\n"
1365 << "║ (max) Adv-pfx ║\n"
1366 << "║ Align ║\n"
1367 << "║ Score ║\n"
1368 << "╠══════════════════════════════════════════════════════════════════════════════╣\n";
1369
1370 for (size_t i = 0; i < results.size(); ++i)
1371 {
1372 const auto & m = results[i];
1373 const char * tag = m.is_strong ? "[Strong]"
1374 : m.is_weak ? "[Weak] "
1375 : "[Genrl] ";
1376 std::cout
1377 << "║ " << std::setw(2) << i + 1
1378 << ". " << std::left << std::setw(16) << m.name
1379 << " " << tag
1380 << " " << std::right << std::fixed << std::setprecision(4)
1381 << std::setw(7) << m.sac_max_bias
1382 << " " << std::setw(7) << m.sac_avg_bias
1383 << " " << std::setw(7) << m.bic_max_corr
1384 << " " << std::setw(8) << m.diff_worst
1385 << " " << std::setw(7) << m.multireschi
1386 << " " << std::setw(7) << m.adv_chi_ctr
1387 << " ║\n"
1388 << "║ " << std::string(22, ' ')
1389 << " "
1390 << " "
1391 << std::setw(7) << m.adv_chi_pfx
1392 << " ║\n"
1393 << "║ " << std::string(22, ' ')
1394 << " "
1395 << " "
1396 << (m.align_ok ? " align✓" : " alignX")
1397 << " ║\n"
1398 << "║ " << std::string(22, ' ')
1399 << " "
1400 << " Score: "
1401 << std::setw(7) << m.score
1402 << " ║\n"
1403 << "╠══════════════════════════════════════════════════════════════════════════════╣\n";
1404 }
1405
1406 std::cout
1407 << "╚══════════════════════════════════════════════════════════════════════════════╝\n"
1408 << "\nLower score = better quality. Strong-profile threshold: score < 1.0\n"
1409 << "Columns: SAC-max/avg = Strict Avalanche Criterion bias\n"
1410 << " BIC-max = Bit Independence max correlation\n"
1411 << " Diff-wst = worst differential mean deviation (relative)\n"
1412 << " MR-chi = max reduced chi-square across 4 table sizes\n"
1413 << " Adv-ctr/pfx = adversarial pattern chi-square\n"
1414 << "\n========================================\n"
1415 << "[HASH-007] Advanced validation complete.\n"
1416 << "========================================\n";
1417
1418 // Strong functions must rank above true general-profile ones (weak excluded).
1419 // (There must exist at least one strong fn with better score than any general fn.)
1420 double best_strong = std::numeric_limits<double>::max();
1421 double best_general = std::numeric_limits<double>::max();
1422 for (const auto & m : results)
1423 {
1424 if (m.is_strong)
1425 best_strong = std::min(best_strong, m.score);
1426 else if (not m.is_weak)
1427 best_general = std::min(best_general, m.score);
1428 }
1430 << "Best strong-profile function should outrank best general-profile function";
1431}
long double h
Definition btreepic.C:154
long double w
Definition btreepic.C:153
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:139
static void SetUpTestSuite()
static mt19937 rng
#define N
Definition fib.C:294
__gmp_expr< T, __gmp_unary_expr< __gmp_expr< T, U >, __gmp_exp_function > > exp(const __gmp_expr< T, U > &expr)
Definition gmpfrxx.h:4066
size_t sax_hash(const void *key, const size_t len) noexcept
Shift-Add-XOR hash (SAX hash)
Definition hash-fct.H:183
size_t SuperFastHash(const void *key, size_t len, std::uint32_t seed=0) noexcept
Paul Hsieh super fast hash function.
Definition hash-fct.H:408
size_t add_hash(const void *key, const size_t len) noexcept
Additive hash.
Definition hash-fct.H:101
size_t jsw_hash(const void *key, size_t len) noexcept
JSW hash (Julienne Walker)
Definition hash-fct.C:247
size_t xxhash64_hash(const void *key, size_t len, std::uint64_t seed) noexcept
xxHash64 from the xxHash family.
Definition hash-fct.C:611
size_t djb_hash(const void *key, const size_t len) noexcept
Bernstein's hash (DJB hash)
Definition hash-fct.H:163
size_t jen_hash(const void *key, size_t length, unsigned initval) noexcept
Jenkins hash (lookup3)
Definition hash-fct.C:325
size_t xor_hash(const void *key, const size_t len) noexcept
XOR hash.
Definition hash-fct.H:122
size_t fnv_hash(const void *key, const size_t len) noexcept
FNV-1a hash.
Definition hash-fct.H:203
size_t siphash24_hash(const void *key, size_t len, std::uint64_t key0, std::uint64_t key1) noexcept
SipHash-2-4 keyed hash.
Definition hash-fct.C:766
void init_jsw() noexcept
Initializes the randomized lookup table used by jsw_hash().
Definition hash-fct.C:219
size_t rot_hash(const void *key, const size_t len) noexcept
Rotating hash.
Definition hash-fct.H:143
size_t wyhash_hash(const void *key, size_t len, std::uint64_t seed) noexcept
wyhash non-cryptographic hash.
Definition hash-fct.C:694
size_t elf_hash(const void *key, const size_t len) noexcept
ELF hash.
Definition hash-fct.H:279
size_t oat_hash(const void *key, const size_t len) noexcept
One-at-a-Time hash (OAT hash)
Definition hash-fct.H:223
TEST_F(HashValidation, StrictAvalancheCriterion)
const long double offset[]
Offset values indexed by symbol string length (bounded by MAX_OFFSET_INDEX)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
and
Check uniqueness with explicit hash + equality functors.
size_t size(Node *root) noexcept
auto variance(const Container &data, bool population=false) -> std::decay_t< decltype(*std::begin(data))>
Compute variance using Welford's numerically stable algorithm.
Definition stat_utils.H:215
static void suffix(Node *root, DynList< Node * > &acc)
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
static void prefix(Node *root, DynList< Node * > &acc)
auto stddev(const Container &data, bool population=false) -> std::decay_t< decltype(*std::begin(data))>
Compute standard deviation.
Definition stat_utils.H:257
auto mean(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute the arithmetic mean.
Definition stat_utils.H:183
bool diff(const C1 &c1, const C2 &c2, Eq e=Eq())
Check if two containers differ.
const unsigned Default_Hash_Seed
Definition hash-fct.C:45
size_t murmur3hash(const Key &key, std::uint32_t seed)
Definition hash-fct.H:334
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
T sum(const Container &container, const T &init=T{})
Compute sum of all elements.
FooMap m(5, fst_unit_pair_hash, snd_unit_pair_hash)
int keys[]
ValueArg< size_t > seed
Definition testHash.C:48
static int * k
Dynamic array container with automatic resizing.
DynList< int > l