Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
set_structures_benchmark.cc
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
172#include <iostream>
173#include <iomanip>
174#include <chrono>
175#include <vector>
176#include <algorithm>
177#include <random>
178#include <string>
179#include <functional>
180#include <cmath>
181#include <set>
182#include <unordered_set>
183
184// Tree-based sets
185#include <tpl_dynSetTree.H>
186
187// Skip list
188#include <tpl_dynSkipList.H>
189
190// Hash-based sets
191#include <tpl_dynSetHash.H>
192#include <tpl_odhash.H>
193#include <tpl_olhash.H>
194
195using namespace std;
196using namespace Aleph;
197
198// =============================================================================
199// Type Aliases for All Set Structures
200// =============================================================================
201
202// Tree-based sets (O(log n) worst case)
208
209// Ranked tree variants (with select/position operations)
212
213// Skip list (expected O(log n))
215
216// Hash-based sets (expected O(1))
217using LhashSet = DynSetLhash<int>; // Separate chaining
218using LinHashSet = DynSetLinHash<int>; // Linear probing (dynamic)
219using ODHashSet = SetODhash<int>; // Double hashing (open addr)
220using OLHashSet = SetOLhash<int>; // Linear probing (open addr)
221
222// =============================================================================
223// Benchmark Infrastructure
224// =============================================================================
225
227{
228 string name;
229 string category;
230 double insert_ms;
231 double search_ms;
232 double remove_ms;
233 double total_ms;
234 size_t memory_hint; // Rough estimate
235};
236
237// Timing utility
238template <typename Func>
239double measure_ms(Func&& f)
240{
241 auto start = chrono::high_resolution_clock::now();
242 f();
243 auto end = chrono::high_resolution_clock::now();
244 return chrono::duration<double, milli>(end - start).count();
245}
246
247// Generic benchmark for tree-based sets
248template <typename Set>
249BenchmarkResult benchmark_tree_set(const string& name, const string& category,
250 const vector<int>& data)
251{
252 BenchmarkResult result;
253 result.name = name;
254 result.category = category;
255
256 Set set;
257
258 // Insert
259 result.insert_ms = measure_ms([&]() {
260 for (int x : data)
261 (void)set.insert(x);
262 });
263
264 // Search all
265 result.search_ms = measure_ms([&]() {
266 for (int x : data)
267 {
268 auto* p = set.search(x);
269 if (p == nullptr)
270 cerr << "ERROR: " << x << " not found in " << name << endl;
271 }
272 });
273
274 // Remove all
275 result.remove_ms = measure_ms([&]() {
276 for (int x : data)
277 set.remove(x);
278 });
279
280 result.total_ms = result.insert_ms + result.search_ms + result.remove_ms;
281 result.memory_hint = data.size() * (sizeof(int) + 3 * sizeof(void*)); // Estimate
282
283 return result;
284}
285
286// Generic benchmark for hash-based sets with proper interface
287template <typename Set>
288BenchmarkResult benchmark_hash_set(const string& name, const string& category,
289 const vector<int>& data)
290{
291 BenchmarkResult result;
292 result.name = name;
293 result.category = category;
294
295 Set set;
296
297 // Insert
298 result.insert_ms = measure_ms([&]() {
299 for (int x : data)
300 (void)set.insert(x);
301 });
302
303 // Search all using search() which returns pointer
304 result.search_ms = measure_ms([&]() {
305 for (int x : data)
306 {
307 auto* p = set.search(x);
308 if (p == nullptr)
309 cerr << "ERROR: " << x << " not found in " << name << endl;
310 }
311 });
312
313 // Remove all
314 result.remove_ms = measure_ms([&]() {
315 for (int x : data)
316 set.remove(x);
317 });
318
319 result.total_ms = result.insert_ms + result.search_ms + result.remove_ms;
320 result.memory_hint = data.size() * sizeof(int) * 2; // Rough estimate
321
322 return result;
323}
324
325// Benchmark for skip list
326BenchmarkResult benchmark_skip_list(const string& name, const string& category,
327 const vector<int>& data)
328{
329 BenchmarkResult result;
330 result.name = name;
331 result.category = category;
332
333 SkipSet set;
334
335 // Insert
336 result.insert_ms = measure_ms([&]() {
337 for (int x : data)
338 (void)set.insert(x);
339 });
340
341 // Search all
342 result.search_ms = measure_ms([&]() {
343 for (int x : data)
344 {
345 auto* p = set.search(x);
346 if (p == nullptr)
347 cerr << "ERROR: " << x << " not found in " << name << endl;
348 }
349 });
350
351 // Remove all
352 result.remove_ms = measure_ms([&]() {
353 for (int x : data)
354 set.remove(x);
355 });
356
357 result.total_ms = result.insert_ms + result.search_ms + result.remove_ms;
358 result.memory_hint = data.size() * sizeof(int) * 4; // Skip list has ~2 pointers per level avg
359
360 return result;
361}
362
363// Benchmark for std::set (STL ordered set - Red-Black tree based)
364BenchmarkResult benchmark_std_set(const string& name, const string& category,
365 const vector<int>& data)
366{
367 BenchmarkResult result;
368 result.name = name;
369 result.category = category;
370
371 std::set<int> s;
372
373 // Insert
374 result.insert_ms = measure_ms([&]() {
375 for (int x : data)
376 s.insert(x);
377 });
378
379 // Search all using find()
380 result.search_ms = measure_ms([&]() {
381 for (int x : data)
382 {
383 auto it = s.find(x);
384 if (it == s.end())
385 cerr << "ERROR: " << x << " not found in " << name << endl;
386 }
387 });
388
389 // Remove all using erase()
390 result.remove_ms = measure_ms([&]() {
391 for (int x : data)
392 s.erase(x);
393 });
394
395 result.total_ms = result.insert_ms + result.search_ms + result.remove_ms;
396 result.memory_hint = data.size() * (sizeof(int) + 3 * sizeof(void*));
397
398 return result;
399}
400
401// Benchmark for std::unordered_set (STL hash set)
402BenchmarkResult benchmark_std_unordered_set(const string& name, const string& category,
403 const vector<int>& data)
404{
405 BenchmarkResult result;
406 result.name = name;
407 result.category = category;
408
409 std::unordered_set<int> s;
410
411 // Insert
412 result.insert_ms = measure_ms([&]() {
413 for (int x : data)
414 s.insert(x);
415 });
416
417 // Search all using find()
418 result.search_ms = measure_ms([&]() {
419 for (int x : data)
420 {
421 auto it = s.find(x);
422 if (it == s.end())
423 cerr << "ERROR: " << x << " not found in " << name << endl;
424 }
425 });
426
427 // Remove all using erase()
428 result.remove_ms = measure_ms([&]() {
429 for (int x : data)
430 s.erase(x);
431 });
432
433 result.total_ms = result.insert_ms + result.search_ms + result.remove_ms;
434 result.memory_hint = data.size() * sizeof(int) * 2;
435
436 return result;
437}
438
439// =============================================================================
440// Data Generation
441// =============================================================================
442
443vector<int> generate_random_data(size_t n, unsigned seed)
444{
445 mt19937 rng(seed);
446 uniform_int_distribution<int> dist(0, numeric_limits<int>::max());
447
448 vector<int> data;
449 data.reserve(n);
450 for (size_t i = 0; i < n; ++i)
451 data.push_back(dist(rng));
452
453 // Remove duplicates (sets don't allow them)
454 sort(data.begin(), data.end());
455 data.erase(unique(data.begin(), data.end()), data.end());
456
457 // Shuffle for random insertion order
458 shuffle(data.begin(), data.end(), rng);
459
460 return data;
461}
462
463vector<int> generate_sequential_data(size_t n)
464{
465 vector<int> data;
466 data.reserve(n);
467 for (size_t i = 0; i < n; ++i)
468 data.push_back(static_cast<int>(i));
469 return data;
470}
471
472// =============================================================================
473// Results Display
474// =============================================================================
475
477{
478 (void)n;
479 cout << "\n";
480 cout << "┌────────────────────┬─────────────┬────────────┬────────────┬────────────┬────────────┐\n";
481 cout << "│ Structure │ Category │ Insert(ms) │ Search(ms) │ Remove(ms) │ Total(ms) │\n";
482 cout << "├────────────────────┼─────────────┼────────────┼────────────┼────────────┼────────────┤\n";
483
484 string last_category;
485 for (const auto& r : results)
486 {
487 if (!last_category.empty() && r.category != last_category)
488 cout << "├────────────────────┼─────────────┼────────────┼────────────┼────────────┼────────────┤\n";
489 last_category = r.category;
490
491 cout << "│ " << setw(18) << left << r.name
492 << " │ " << setw(11) << r.category
493 << " │ " << setw(10) << right << fixed << setprecision(2) << r.insert_ms
494 << " │ " << setw(10) << r.search_ms
495 << " │ " << setw(10) << r.remove_ms
496 << " │ " << setw(10) << r.total_ms << " │\n";
497 }
498
499 cout << "└────────────────────┴─────────────┴────────────┴────────────┴────────────┴────────────┘\n";
500
501 // Find best in each category
502 cout << "\n▶ Best by Operation:\n";
503
504 auto find_best = [&](auto getter, const string& op_name) {
505 const BenchmarkResult* best = nullptr;
506 for (const auto& r : results)
507 if (!best || getter(r) < getter(*best))
508 best = &r;
509 if (best)
510 cout << " " << setw(10) << left << op_name << ": "
511 << best->name << " (" << fixed << setprecision(2) << getter(*best) << " ms)\n";
512 };
513
514 find_best([](const auto& r) { return r.insert_ms; }, "Insert");
515 find_best([](const auto& r) { return r.search_ms; }, "Search");
516 find_best([](const auto& r) { return r.remove_ms; }, "Remove");
517 find_best([](const auto& r) { return r.total_ms; }, "Overall");
518}
519
521{
522 cout << "\n▶ Operations per Second (thousands):\n\n";
523 cout << " Structure │ Insert K/s │ Search K/s │ Remove K/s\n";
524 cout << " ───────────────────┼─────────────┼─────────────┼────────────\n";
525
526 for (const auto& r : results)
527 {
528 double insert_kops = (n / r.insert_ms);
529 double search_kops = (n / r.search_ms);
530 double remove_kops = (n / r.remove_ms);
531
532 cout << " " << setw(18) << left << r.name << " │ "
533 << setw(11) << right << fixed << setprecision(0) << insert_kops << " │ "
534 << setw(11) << search_kops << " │ "
535 << setw(10) << remove_kops << "\n";
536 }
537}
538
539// =============================================================================
540// Main Benchmark
541// =============================================================================
542
543void run_full_benchmark(size_t n, unsigned seed, bool include_ranked)
544{
545 cout << "╔══════════════════════════════════════════════════════════════════════════════╗\n";
546 cout << "║ Set Data Structures Benchmark - Aleph-w ║\n";
547 cout << "╚══════════════════════════════════════════════════════════════════════════════╝\n\n";
548
549 cout << "Configuration:\n";
550 cout << " Elements: " << n << "\n";
551 cout << " Seed: " << seed << "\n";
552 cout << " Theoretical log₂(n) = " << fixed << setprecision(1) << log2(n) << "\n\n";
553
554 cout << "Generating random data... " << flush;
555 auto data = generate_random_data(n, seed);
556 cout << "done (" << data.size() << " unique elements)\n\n";
557
559
560 // ==========================================================================
561 // Tree-based sets
562 // ==========================================================================
563 cout << "Running Tree benchmarks...\n";
564
565 results.push_back(benchmark_tree_set<AvlSet>("AVL Tree", "Tree", data));
566 cout << " ✓ AVL Tree\n";
567
568 results.push_back(benchmark_tree_set<RbSet>("Red-Black Tree", "Tree", data));
569 cout << " ✓ Red-Black Tree\n";
570
571 results.push_back(benchmark_tree_set<SplaySet>("Splay Tree", "Tree", data));
572 cout << " ✓ Splay Tree\n";
573
574 results.push_back(benchmark_tree_set<TreapSet>("Treap", "Tree", data));
575 cout << " ✓ Treap\n";
576
577 results.push_back(benchmark_tree_set<RandSet>("Rand Tree", "Tree", data));
578 cout << " ✓ Rand Tree\n";
579
580 if (include_ranked)
581 {
582 results.push_back(benchmark_tree_set<AvlRkSet>("AVL Tree Rk", "Tree+Rank", data));
583 cout << " ✓ AVL Tree Rk\n";
584
585 results.push_back(benchmark_tree_set<TreapRkSet>("Treap Rk", "Tree+Rank", data));
586 cout << " ✓ Treap Rk\n";
587 }
588
589 // ==========================================================================
590 // Skip List
591 // ==========================================================================
592 cout << "Running Skip List benchmark...\n";
593
594 results.push_back(benchmark_skip_list("Skip List", "Skip List", data));
595 cout << " ✓ Skip List\n";
596
597 // ==========================================================================
598 // Hash-based sets
599 // ==========================================================================
600 cout << "Running Hash benchmarks...\n";
601
602 results.push_back(benchmark_hash_set<LhashSet>("DynSetLhash", "Hash Chain", data));
603 cout << " ✓ DynSetLhash (chaining)\n";
604
605 results.push_back(benchmark_hash_set<LinHashSet>("DynSetLinHash", "Hash Dyn", data));
606 cout << " ✓ DynSetLinHash (dynamic)\n";
607
608 results.push_back(benchmark_hash_set<ODHashSet>("SetODhash", "Hash Open", data));
609 cout << " ✓ SetODhash (double hash)\n";
610
611 results.push_back(benchmark_hash_set<OLHashSet>("SetOLhash", "Hash Open", data));
612 cout << " ✓ SetOLhash (linear probe)\n";
613
614 // ==========================================================================
615 // STL containers for comparison
616 // ==========================================================================
617 cout << "Running STL benchmarks...\n";
618
619 results.push_back(benchmark_std_set("std::set", "STL Tree", data));
620 cout << " ✓ std::set (RB tree)\n";
621
622 results.push_back(benchmark_std_unordered_set("std::unordered_set", "STL Hash", data));
623 cout << " ✓ std::unordered_set\n";
624
625 // ==========================================================================
626 // Display Results
627 // ==========================================================================
630
631 // ==========================================================================
632 // Summary
633 // ==========================================================================
634 cout << R"(
635━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
636Summary & Recommendations
637━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
638
639┌───────────────────────────────────────────────────────────────────────────────┐
640│ TREE-BASED (O(log n)) │
641│ ═══════════════════ │
642│ • AVL Tree: Strictest balance, best for read-heavy, deterministic │
643│ • Red-Black: Good all-around, used in std::set/map │
644│ • Splay Tree: Self-adjusting, great if same elements accessed often │
645│ • Treap/Rand: Randomized, simpler code, good average case │
646│ • Use when: Need ordered traversal, range queries, or worst-case O(log n)│
647├───────────────────────────────────────────────────────────────────────────────┤
648│ SKIP LIST (Expected O(log n)) │
649│ ═════════════════════════════ │
650│ • Simple probabilistic structure, no rotations needed │
651│ • Easy to make concurrent (though not implemented here) │
652│ • Use when: Want simplicity, or planning concurrent extension │
653├───────────────────────────────────────────────────────────────────────────────┤
654│ HASH TABLES (Expected O(1)) │
655│ ═══════════════════════════ │
656│ • DynSetLhash: Separate chaining - handles high load gracefully │
657│ • DynSetLinHash: Linear probing with expansion - good for varying sizes │
658│ • SetODhash: Double hashing - minimal clustering, cache-friendly │
659│ • SetOLhash: Linear probing - best cache locality, simple │
660│ • Use when: Speed is critical and order doesn't matter │
661│ │
662│ Hash Table Selection Guide: │
663│ • High insert/delete rate → DynSetLhash (chaining handles it well) │
664│ • Memory efficiency → SetODhash/SetOLhash (no pointers per element) │
665│ • Unknown final size → DynSetLinHash (expands automatically) │
666│ • Fixed size known → SetODhash (set size at construction) │
667└───────────────────────────────────────────────────────────────────────────────┘
668)";
669}
670
671// =============================================================================
672// Sequential Access Pattern Test
673// =============================================================================
674
675void run_sequential_test(size_t n)
676{
677 cout << "\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n";
678 cout << "Sequential Access Pattern (tests worst-case for some structures)\n";
679 cout << "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n";
680
681 cout << "Inserting " << n << " elements in sorted order...\n";
682 auto data = generate_sequential_data(n);
683
685
686 // Trees should handle this well (self-balancing)
687 results.push_back(benchmark_tree_set<AvlSet>("AVL Tree", "Tree", data));
688 results.push_back(benchmark_tree_set<RbSet>("Red-Black", "Tree", data));
689
690 // Skip list - probabilistic, should be OK
691 results.push_back(benchmark_skip_list("Skip List", "Skip", data));
692
693 // Hash tables - should be fine
694 results.push_back(benchmark_hash_set<LhashSet>("DynSetLhash", "Hash", data));
695 results.push_back(benchmark_hash_set<ODHashSet>("SetODhash", "Hash", data));
696
697 // STL containers
698 results.push_back(benchmark_std_set("std::set", "STL Tree", data));
699 results.push_back(benchmark_std_unordered_set("std::unordered_set", "STL Hash", data));
700
701 cout << "\nSequential insertion results:\n";
703
704 cout << "\nNote: Splay tree would be O(n²) for sequential insertion without\n";
705 cout << "subsequent accesses, as it doesn't rebalance until accessed.\n";
706}
707
708// =============================================================================
709// Main
710// =============================================================================
711
712int main(int argc, char* argv[])
713{
714 // Default parameters
715 size_t n = 1000000; // 1,000,000 keys by default
716 unsigned seed = 42;
717 bool include_ranked = false;
718 bool run_sequential = false;
719
720 // Simple argument parsing (no TCLAP dependency)
721 for (int i = 1; i < argc; ++i)
722 {
723 string arg = argv[i];
724 if ((arg == "-n" || arg == "--count") && i + 1 < argc)
725 n = stoull(argv[++i]);
726 else if ((arg == "-s" || arg == "--seed") && i + 1 < argc)
727 seed = stoul(argv[++i]);
728 else if (arg == "-r" || arg == "--ranked")
729 include_ranked = true;
730 else if (arg == "-q" || arg == "--sequential")
731 run_sequential = true;
732 else if (arg == "-h" || arg == "--help")
733 {
734 cout << "Set Data Structures Benchmark\n\n"
735 << "Usage: " << argv[0] << " [options]\n\n"
736 << "Options:\n"
737 << " -n, --count N Number of elements (default: 1000000)\n"
738 << " -s, --seed S Random seed (default: 42)\n"
739 << " -r, --ranked Include ranked tree variants\n"
740 << " -q, --sequential Also run sequential insertion test\n"
741 << " -h, --help Show this help\n";
742 return 0;
743 }
744 }
745
746 // Run main benchmark
748
749 // Optional sequential test
750 if (run_sequential)
751 run_sequential_test(min(n, size_t(50000))); // Limit sequential test size
752
753 cout << "\nDone.\n";
754 return 0;
755}
int main()
void empty() noexcept
empty the list
Definition htlist.H:1689
Dynamic set backed by balanced binary search trees with automatic memory management.
size_t remove(const Key &key)
Remove a key from the set.
Key * insert(const Key &key)
Insert an element into the set.
Key * search(const Key &key) const noexcept
Search for a key in the set.
size_t size() const noexcept
Count the number of elements of the list.
Definition htlist.H:1319
Open addressing hash table with double hashing collision resolution.
Definition tpl_odhash.H:180
Open addressing hash table with linear probing collision resolution.
Definition tpl_olhash.H:168
static mt19937 rng
__gmp_expr< T, __gmp_unary_expr< __gmp_expr< T, U >, __gmp_log2_function > > log2(const __gmp_expr< T, U > &expr)
Definition gmpfrxx.h:4064
__gmp_expr< typename __gmp_resolve_expr< T, V >::value_type, __gmp_binary_expr< __gmp_expr< T, U >, __gmp_expr< V, W >, __gmp_min_function > > min(const __gmp_expr< T, U > &expr1, const __gmp_expr< V, W > &expr2)
Definition gmpfrxx.h:4111
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
Itor unique(Itor __first, Itor __last, BinaryPredicate __binary_pred=BinaryPredicate())
Remove consecutive duplicates in place.
Definition ahAlgo.H:1058
DynArray< T > sort(const DynArray< T > &a, Cmp &&cmp=Cmp())
Returns a sorted copy of a DynArray.
Definition ahSort.H:227
auto shuffle(const C< T > &c)
Randomly shuffle a sequence.
DynList< T > maps(const C &c, Op op)
Classic map operation.
STL namespace.
void print_operations_per_second(const vector< BenchmarkResult > &results, size_t n)
vector< int > generate_random_data(size_t n, unsigned seed)
BenchmarkResult benchmark_std_unordered_set(const string &name, const string &category, const vector< int > &data)
void print_results_table(const vector< BenchmarkResult > &results, size_t n)
BenchmarkResult benchmark_hash_set(const string &name, const string &category, const vector< int > &data)
BenchmarkResult benchmark_skip_list(const string &name, const string &category, const vector< int > &data)
void run_sequential_test(size_t n)
BenchmarkResult benchmark_tree_set(const string &name, const string &category, const vector< int > &data)
BenchmarkResult benchmark_std_set(const string &name, const string &category, const vector< int > &data)
vector< int > generate_sequential_data(size_t n)
void run_full_benchmark(size_t n, unsigned seed, bool include_ranked)
double measure_ms(Func &&f)
Dynamic set implementations based on hash tables.
Dynamic set implementations based on balanced binary search trees.
Dynamic ordered set implemented with a Skip List.
Open addressing hash table with double hashing.
Open addressing hash table with linear probing.