182#include <unordered_set>
196using namespace Aleph;
238template <
typename Func>
241 auto start = chrono::high_resolution_clock::now();
243 auto end = chrono::high_resolution_clock::now();
244 return chrono::duration<double, milli>(end - start).count();
248template <
typename Set>
250 const vector<int>& data)
268 auto* p = set.search(x);
270 cerr <<
"ERROR: " << x <<
" not found in " << name <<
endl;
281 result.
memory_hint = data.size() * (
sizeof(
int) + 3 *
sizeof(
void*));
287template <
typename Set>
289 const vector<int>& data)
307 auto* p = set.search(x);
309 cerr <<
"ERROR: " << x <<
" not found in " << name <<
endl;
327 const vector<int>& data)
347 cerr <<
"ERROR: " << x <<
" not found in " << name <<
endl;
365 const vector<int>& data)
385 cerr <<
"ERROR: " << x <<
" not found in " << name <<
endl;
396 result.
memory_hint = data.size() * (
sizeof(
int) + 3 *
sizeof(
void*));
403 const vector<int>& data)
409 std::unordered_set<int> s;
423 cerr <<
"ERROR: " << x <<
" not found in " << name <<
endl;
450 for (
size_t i = 0; i < n; ++i)
451 data.push_back(dist(
rng));
454 sort(data.begin(), data.end());
455 data.erase(
unique(data.begin(), data.end()), data.end());
467 for (
size_t i = 0; i < n; ++i)
468 data.push_back(
static_cast<int>(i));
480 cout <<
"┌────────────────────┬─────────────┬────────────┬────────────┬────────────┬────────────┐\n";
481 cout <<
"│ Structure │ Category │ Insert(ms) │ Search(ms) │ Remove(ms) │ Total(ms) │\n";
482 cout <<
"├────────────────────┼─────────────┼────────────┼────────────┼────────────┼────────────┤\n";
488 cout <<
"├────────────────────┼─────────────┼────────────┼────────────┼────────────┼────────────┤\n";
491 cout <<
"│ " <<
setw(18) << left << r.name
492 <<
" │ " <<
setw(11) << r.category
494 <<
" │ " <<
setw(10) << r.search_ms
495 <<
" │ " <<
setw(10) << r.remove_ms
496 <<
" │ " <<
setw(10) << r.total_ms <<
" │\n";
499 cout <<
"└────────────────────┴─────────────┴────────────┴────────────┴────────────┴────────────┘\n";
502 cout <<
"\n▶ Best by Operation:\n";
514 find_best([](
const auto& r) {
return r.insert_ms; },
"Insert");
515 find_best([](
const auto& r) {
return r.search_ms; },
"Search");
516 find_best([](
const auto& r) {
return r.remove_ms; },
"Remove");
517 find_best([](
const auto& r) {
return r.total_ms; },
"Overall");
522 cout <<
"\n▶ Operations per Second (thousands):\n\n";
523 cout <<
" Structure │ Insert K/s │ Search K/s │ Remove K/s\n";
524 cout <<
" ───────────────────┼─────────────┼─────────────┼────────────\n";
532 cout <<
" " <<
setw(18) << left << r.name <<
" │ "
545 cout <<
"╔══════════════════════════════════════════════════════════════════════════════╗\n";
546 cout <<
"║ Set Data Structures Benchmark - Aleph-w ║\n";
547 cout <<
"╚══════════════════════════════════════════════════════════════════════════════╝\n\n";
549 cout <<
"Configuration:\n";
550 cout <<
" Elements: " << n <<
"\n";
551 cout <<
" Seed: " << seed <<
"\n";
554 cout <<
"Generating random data... " << flush;
556 cout <<
"done (" << data.
size() <<
" unique elements)\n\n";
563 cout <<
"Running Tree benchmarks...\n";
566 cout <<
" ✓ AVL Tree\n";
569 cout <<
" ✓ Red-Black Tree\n";
572 cout <<
" ✓ Splay Tree\n";
575 cout <<
" ✓ Treap\n";
578 cout <<
" ✓ Rand Tree\n";
583 cout <<
" ✓ AVL Tree Rk\n";
586 cout <<
" ✓ Treap Rk\n";
592 cout <<
"Running Skip List benchmark...\n";
595 cout <<
" ✓ Skip List\n";
600 cout <<
"Running Hash benchmarks...\n";
603 cout <<
" ✓ DynSetLhash (chaining)\n";
606 cout <<
" ✓ DynSetLinHash (dynamic)\n";
609 cout <<
" ✓ SetODhash (double hash)\n";
612 cout <<
" ✓ SetOLhash (linear probe)\n";
617 cout <<
"Running STL benchmarks...\n";
620 cout <<
" ✓ std::set (RB tree)\n";
623 cout <<
" ✓ std::unordered_set\n";
635━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
636Summary & Recommendations
637━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
639┌───────────────────────────────────────────────────────────────────────────────┐
640│ TREE-BASED (O(log n)) │
641│ ═══════════════════ │
642│ • AVL Tree: Strictest balance, best for read-heavy, deterministic │
643│ • Red-Black: Good all-around, used in std::set/map │
644│ • Splay Tree: Self-adjusting, great if same elements accessed often │
645│ • Treap/Rand: Randomized, simpler code, good average case │
646│ • Use when: Need ordered traversal, range queries, or worst-case O(log n)│
647├───────────────────────────────────────────────────────────────────────────────┤
648│ SKIP LIST (Expected O(log n)) │
649│ ═════════════════════════════ │
650│ • Simple probabilistic structure, no rotations needed │
651│ • Easy to make concurrent (though not implemented here) │
652│ • Use when: Want simplicity, or planning concurrent extension │
653├───────────────────────────────────────────────────────────────────────────────┤
654│ HASH TABLES (Expected O(1)) │
655│ ═══════════════════════════ │
656│ • DynSetLhash: Separate chaining - handles high load gracefully │
657│ • DynSetLinHash: Linear probing with expansion - good for varying sizes │
658│ • SetODhash: Double hashing - minimal clustering, cache-friendly │
659│ • SetOLhash: Linear probing - best cache locality, simple │
660│ • Use when: Speed is critical and order doesn't matter │
662│ Hash Table Selection Guide: │
663│ • High insert/delete rate → DynSetLhash (chaining handles it well) │
664│ • Memory efficiency → SetODhash/SetOLhash (no pointers per element) │
665│ • Unknown final size → DynSetLinHash (expands automatically) │
666│ • Fixed size known → SetODhash (set size at construction) │
667└───────────────────────────────────────────────────────────────────────────────┘
677 cout <<
"\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n";
678 cout <<
"Sequential Access Pattern (tests worst-case for some structures)\n";
679 cout <<
"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n";
681 cout <<
"Inserting " << n <<
" elements in sorted order...\n";
701 cout <<
"\nSequential insertion results:\n";
704 cout <<
"\nNote: Splay tree would be O(n²) for sequential insertion without\n";
705 cout <<
"subsequent accesses, as it doesn't rebalance until accessed.\n";
721 for (
int i = 1; i <
argc; ++i)
724 if ((
arg ==
"-n" ||
arg ==
"--count") && i + 1 <
argc)
726 else if ((
arg ==
"-s" ||
arg ==
"--seed") && i + 1 <
argc)
728 else if (
arg ==
"-r" ||
arg ==
"--ranked")
730 else if (
arg ==
"-q" ||
arg ==
"--sequential")
732 else if (
arg ==
"-h" ||
arg ==
"--help")
734 cout <<
"Set Data Structures Benchmark\n\n"
735 <<
"Usage: " <<
argv[0] <<
" [options]\n\n"
737 <<
" -n, --count N Number of elements (default: 1000000)\n"
738 <<
" -s, --seed S Random seed (default: 42)\n"
739 <<
" -r, --ranked Include ranked tree variants\n"
740 <<
" -q, --sequential Also run sequential insertion test\n"
741 <<
" -h, --help Show this help\n";
void empty() noexcept
empty the list
Dynamic set backed by balanced binary search trees with automatic memory management.
size_t remove(const Key &key)
Remove a key from the set.
Key * insert(const Key &key)
Insert an element into the set.
Key * search(const Key &key) const noexcept
Search for a key in the set.
size_t size() const noexcept
Count the number of elements of the list.
Open addressing hash table with double hashing collision resolution.
Open addressing hash table with linear probing collision resolution.
__gmp_expr< T, __gmp_unary_expr< __gmp_expr< T, U >, __gmp_log2_function > > log2(const __gmp_expr< T, U > &expr)
__gmp_expr< typename __gmp_resolve_expr< T, V >::value_type, __gmp_binary_expr< __gmp_expr< T, U >, __gmp_expr< V, W >, __gmp_min_function > > min(const __gmp_expr< T, U > &expr1, const __gmp_expr< V, W > &expr2)
Main namespace for Aleph-w library functions.
Itor unique(Itor __first, Itor __last, BinaryPredicate __binary_pred=BinaryPredicate())
Remove consecutive duplicates in place.
DynArray< T > sort(const DynArray< T > &a, Cmp &&cmp=Cmp())
Returns a sorted copy of a DynArray.
auto shuffle(const C< T > &c)
Randomly shuffle a sequence.
DynList< T > maps(const C &c, Op op)
Classic map operation.
void print_operations_per_second(const vector< BenchmarkResult > &results, size_t n)
vector< int > generate_random_data(size_t n, unsigned seed)
BenchmarkResult benchmark_std_unordered_set(const string &name, const string &category, const vector< int > &data)
void print_results_table(const vector< BenchmarkResult > &results, size_t n)
BenchmarkResult benchmark_hash_set(const string &name, const string &category, const vector< int > &data)
BenchmarkResult benchmark_skip_list(const string &name, const string &category, const vector< int > &data)
void run_sequential_test(size_t n)
BenchmarkResult benchmark_tree_set(const string &name, const string &category, const vector< int > &data)
BenchmarkResult benchmark_std_set(const string &name, const string &category, const vector< int > &data)
vector< int > generate_sequential_data(size_t n)
void run_full_benchmark(size_t n, unsigned seed, bool include_ranked)
double measure_ms(Func &&f)
Dynamic set implementations based on hash tables.
Dynamic set implementations based on balanced binary search trees.
Dynamic ordered set implemented with a Skip List.
Open addressing hash table with double hashing.
Open addressing hash table with linear probing.