Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
ah_parallel_example.cc
Go to the documentation of this file.
1
83#include <ah-parallel.H>
84#include <iostream>
85#include <iomanip>
86#include <vector>
87#include <cmath>
88#include <chrono>
89#include <numeric>
90#include <random>
91#include <string>
92
93using namespace Aleph;
94using namespace std::chrono_literals;
95
96// Helper to print section headers
97void print_header(const std::string& title)
98{
99 std::cout << "\n";
100 std::cout << "+" << std::string(65, '-') << "+\n";
101 std::cout << "| " << std::left << std::setw(63) << title << " |\n";
102 std::cout << "+" << std::string(65, '-') << "+\n\n";
103}
104
105// =============================================================================
106// EXAMPLE 1: Parallel Map (pmaps)
107// =============================================================================
108//
109// pmaps applies a function to each element in parallel, returning a new vector.
110// This is the parallel equivalent of std::transform or Haskell's map.
111//
112// SIGNATURE:
113// pmaps<ResultT>(pool, container, func) → std::vector<ResultT>
114// pmaps(pool, container, func) → std::vector<auto>
115// pmaps(container, func, ParallelOptions) → std::vector<auto>
116//
117
119{
120 print_header("Example 1: Parallel Map (pmaps)");
121
122 std::cout << "GOAL: Transform a large dataset in parallel.\n\n";
123
124 ThreadPool pool(std::thread::hardware_concurrency());
126 options.pool = &pool;
127 options.min_size = 1024;
128 options.max_tasks = pool.num_threads() * 2;
129 std::cout << "Using ThreadPool with " << pool.num_threads() << " workers\n\n";
130
131 // Create input data: 1 million integers
132 std::vector<int> numbers(1000000);
133 std::iota(numbers.begin(), numbers.end(), 1);
134
135 std::cout << "Input: " << numbers.size() << " integers\n";
136
137 auto start = std::chrono::high_resolution_clock::now();
138
139 // Type deduction: returns vector<long long> because lambda returns long long
140 auto squares = pmaps(numbers, [](int x) {
141 return static_cast<long long>(x) * x;
142 }, options);
143
144 auto end = std::chrono::high_resolution_clock::now();
145 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
146
147 std::cout << "Output: " << squares.size() << " squared values\n\n";
148
149 // Show sample results
150 std::cout << "First 5 results: ";
151 for (size_t i = 0; i < 5; ++i)
152 std::cout << squares[i] << " ";
153 std::cout << "\n";
154
155 std::cout << "Last 5 results: ";
156 for (size_t i = squares.size() - 5; i < squares.size(); ++i)
157 std::cout << squares[i] << " ";
158 std::cout << "\n\n";
159
160 std::cout << "✓ Completed in " << ms << " ms\n";
161}
162
163// =============================================================================
164// EXAMPLE 2: Parallel Filter (pfilter)
165// =============================================================================
166//
167// pfilter selects elements satisfying a predicate, preserving order.
168// Elements are tested in parallel chunks, then merged sequentially.
169//
170
172{
173 print_header("Example 2: Parallel Filter (pfilter)");
174
175 std::cout << "GOAL: Find all prime numbers in a range using parallel filtering.\n\n";
176
177 ThreadPool pool(std::thread::hardware_concurrency());
178
179 // Generate numbers 2 to 100000
180 std::vector<int> candidates(99999);
181 std::iota(candidates.begin(), candidates.end(), 2);
182
183 std::cout << "Checking " << candidates.size() << " candidates for primality...\n";
184
185 // Primality test (intentionally slow for demo)
186 auto is_prime = [](int n) {
187 if (n < 2) return false;
188 if (n == 2) return true;
189 if (n % 2 == 0) return false;
190 for (int i = 3; i * i <= n; i += 2)
191 if (n % i == 0) return false;
192 return true;
193 };
194
195 auto start = std::chrono::high_resolution_clock::now();
196
197 auto primes = pfilter(pool, candidates, is_prime);
198
199 auto end = std::chrono::high_resolution_clock::now();
200 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
201
202 std::cout << "Found " << primes.size() << " primes\n\n";
203
204 // Show some primes
205 std::cout << "First 10: ";
206 for (size_t i = 0; i < 10 && i < primes.size(); ++i)
207 std::cout << primes[i] << " ";
208 std::cout << "\n";
209
210 std::cout << "Last 10: ";
211 for (size_t i = primes.size() > 10 ? primes.size() - 10 : 0; i < primes.size(); ++i)
212 std::cout << primes[i] << " ";
213 std::cout << "\n\n";
214
215 std::cout << "✓ Completed in " << ms << " ms\n";
216}
217
218// =============================================================================
219// EXAMPLE 3: Parallel Fold (pfoldl)
220// =============================================================================
221//
222// pfoldl reduces a container using a binary operation.
223// IMPORTANT: The operation must be ASSOCIATIVE for correct parallel results.
224//
225// Works by:
226// 1. Each chunk computes a partial result
227// 2. Partial results are combined
228//
229
231{
232 print_header("Example 3: Parallel Fold (pfoldl)");
233
234 std::cout << "GOAL: Compute sum and product of a large dataset in parallel.\n\n";
235
236 ThreadPool pool(std::thread::hardware_concurrency());
237
238 // Create data
239 std::vector<double> data(100000);
240 for (size_t i = 0; i < data.size(); ++i)
241 data[i] = 1.0 + 1.0 / (i + 1); // 2, 1.5, 1.333..., etc.
242
243 std::cout << "Data size: " << data.size() << " elements\n\n";
244
245 // SUM with pfoldl
246 auto sum = pfoldl(pool, data, 0.0, std::plus<double>());
247 std::cout << "Sum: " << std::fixed << std::setprecision(2) << sum << "\n";
248
249 // Using psum (convenience function)
250 auto sum2 = psum(pool, data);
251 std::cout << "Sum (psum): " << sum2 << "\n\n";
252
253 // CONCATENATION of strings (associative)
254 std::vector<std::string> words = {"Parallel", " ", "functional", " ",
255 "programming", " ", "is", " ", "powerful!"};
256
257 auto sentence = pfoldl(pool, words, std::string{}, std::plus<std::string>());
258 std::cout << "Concatenated: \"" << sentence << "\"\n\n";
259
260 std::cout << "✓ Fold operations completed\n";
261}
262
263// =============================================================================
264// EXAMPLE 4: Parallel Predicates (pall, pexists, pnone, pcount_if)
265// =============================================================================
266//
267// These functions test conditions on all elements:
268// - pall: true if ALL elements satisfy the predicate
269// - pexists: true if ANY element satisfies the predicate
270// - pnone: true if NO element satisfies the predicate
271// - pcount_if: counts elements satisfying the predicate
272//
273// pall and pexists use SHORT-CIRCUIT evaluation - they stop early when possible.
274//
275
277{
278 print_header("Example 4: Parallel Predicates");
279
280 std::cout << "GOAL: Test conditions on large datasets efficiently.\n\n";
281
282 ThreadPool pool(std::thread::hardware_concurrency());
283
284 // Create test data
285 std::vector<int> data(1000000);
286 std::iota(data.begin(), data.end(), 1); // 1, 2, 3, ..., 1000000
287
288 std::cout << "Dataset: integers 1 to " << data.size() << "\n\n";
289
290 // pall - All positive?
291 bool all_positive = pall(pool, data, [](int x) { return x > 0; });
292 std::cout << "All positive? " << (all_positive ? "YES" : "NO") << "\n";
293
294 // pall - All even? (will short-circuit on first odd)
295 bool all_even = pall(pool, data, [](int x) { return x % 2 == 0; });
296 std::cout << "All even? " << (all_even ? "YES" : "NO") << " (short-circuits early!)\n";
297
298 // pexists - Any divisible by 12345?
299 bool has_special = pexists(pool, data, [](int x) { return x % 12345 == 0; });
300 std::cout << "Has number divisible by 12345? " << (has_special ? "YES" : "NO") << "\n";
301
302 // pnone - No negatives?
303 bool no_negatives = pnone(pool, data, [](int x) { return x < 0; });
304 std::cout << "No negatives? " << (no_negatives ? "YES" : "NO") << "\n";
305
306 // pcount_if - Count multiples of 7
307 size_t sevens = pcount_if(pool, data, [](int x) { return x % 7 == 0; });
308 std::cout << "Multiples of 7: " << sevens << "\n\n";
309
310 std::cout << "✓ Predicate tests completed\n";
311}
312
313// =============================================================================
314// EXAMPLE 5: Parallel Find (pfind, pfind_value)
315// =============================================================================
316//
317// Parallel search with short-circuit optimization.
318// - pfind: returns std::optional<size_t> (index of first match)
319// - pfind_value: returns std::optional<T> (the matched element)
320//
321
323{
324 print_header("Example 5: Parallel Find (pfind, pfind_value)");
325
326 std::cout << "GOAL: Search for elements in parallel with early termination.\n\n";
327
328 ThreadPool pool(std::thread::hardware_concurrency());
329
330 // Create shuffled data
331 std::vector<int> data(1000000);
332 std::iota(data.begin(), data.end(), 0);
333 std::mt19937 rng(42);
334 std::shuffle(data.begin(), data.end(), rng);
335
336 std::cout << "Shuffled dataset of " << data.size() << " elements\n\n";
337
338 // Find index of value 500000
339 auto idx = pfind(pool, data, [](int x) { return x == 500000; });
340 if (idx)
341 std::cout << "Value 500000 found at index " << *idx << "\n";
342 else
343 std::cout << "Value 500000 not found\n";
344
345 // Find first value > 999990
346 auto val = pfind_value(pool, data, [](int x) { return x > 999990; });
347 if (val)
348 std::cout << "First value > 999990: " << *val << "\n";
349 else
350 std::cout << "No value > 999990\n";
351
352 // Search for non-existent value
353 auto missing = pfind(pool, data, [](int x) { return x == -1; });
354 std::cout << "Value -1: " << (missing ? "found" : "not found") << "\n\n";
355
356 std::cout << "✓ Search operations completed\n";
357}
358
359// =============================================================================
360// EXAMPLE 6: Parallel Aggregations (psum, pproduct, pmin, pmax, pminmax)
361// =============================================================================
362//
363// Convenience functions for common reductions.
364//
365
367{
368 print_header("Example 6: Parallel Aggregations");
369
370 std::cout << "GOAL: Compute statistics on large datasets in parallel.\n\n";
371
372 ThreadPool pool(std::thread::hardware_concurrency());
373
374 // Create random data
375 std::vector<double> data(500000);
376 std::mt19937 rng(123);
377 std::uniform_real_distribution<double> dist(-1000.0, 1000.0);
378 for (auto& x : data)
379 x = dist(rng);
380
381 std::cout << "Dataset: " << data.size() << " random doubles in [-1000, 1000]\n\n";
382
383 // Aggregations
384 auto sum = psum(pool, data);
385 auto min_opt = pmin(pool, data);
386 auto max_opt = pmax(pool, data);
387 auto minmax_opt = pminmax(pool, data);
388
389 std::cout << std::fixed << std::setprecision(4);
390 std::cout << "Sum: " << sum << "\n";
391
392 if (min_opt)
393 std::cout << "Min: " << *min_opt << "\n";
394 if (max_opt)
395 std::cout << "Max: " << *max_opt << "\n";
396 if (minmax_opt)
397 std::cout << "MinMax: (" << minmax_opt->first << ", " << minmax_opt->second << ")\n";
398
399 double mean = sum / data.size();
400 std::cout << "Mean: " << mean << "\n\n";
401
402 std::cout << "✓ Aggregation operations completed\n";
403}
404
405// =============================================================================
406// EXAMPLE 7: Parallel Sort (psort)
407// =============================================================================
408//
409// Parallel merge sort: chunks are sorted in parallel, then merged.
410//
411
413{
414 print_header("Example 7: Parallel Sort (psort)");
415
416 std::cout << "GOAL: Sort large datasets using parallel merge sort.\n\n";
417
418 ThreadPool pool(std::thread::hardware_concurrency());
420 options.pool = &pool;
421 options.min_size = 4096;
422 options.max_tasks = pool.num_threads();
423
424 // Create random data
425 std::vector<int> data(500000);
426 std::mt19937 rng(456);
427 for (auto& x : data)
428 x = rng() % 1000000;
429
430 std::cout << "Dataset: " << data.size() << " random integers\n";
431 std::cout << "First 10 (unsorted): ";
432 for (size_t i = 0; i < 10; ++i)
433 std::cout << data[i] << " ";
434 std::cout << "\n\n";
435
436 auto start = std::chrono::high_resolution_clock::now();
437
438 psort(data, std::less<int>{}, options); // In-place parallel sort
439
440 auto end = std::chrono::high_resolution_clock::now();
441 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
442
443 std::cout << "First 10 (sorted): ";
444 for (size_t i = 0; i < 10; ++i)
445 std::cout << data[i] << " ";
446 std::cout << "\n";
447
448 std::cout << "Last 10 (sorted): ";
449 for (size_t i = data.size() - 10; i < data.size(); ++i)
450 std::cout << data[i] << " ";
451 std::cout << "\n\n";
452
453 // Verify sorted
454 bool is_sorted = std::is_sorted(data.begin(), data.end());
455 std::cout << "Correctly sorted? " << (is_sorted ? "YES" : "NO") << "\n";
456 std::cout << "Time: " << ms << " ms\n\n";
457
458 std::cout << "✓ Parallel sort completed\n";
459}
460
461// =============================================================================
462// EXAMPLE 8: Parallel Zip Operations (2 containers)
463// =============================================================================
464//
465// pzip_for_each: Apply function to pairs of elements
466// pzip_maps: Map pairs to a new container
467// pzip_foldl: Reduce pairs (e.g., dot product)
468//
469
471{
472 print_header("Example 8: Parallel Zip Operations (2 containers)");
473
474 std::cout << "GOAL: Process corresponding elements from two containers.\n\n";
475
476 ThreadPool pool(std::thread::hardware_concurrency());
478 options.pool = &pool;
479 options.chunk_size = 4096;
480
481 // Create two vectors
482 std::vector<double> a(100000);
483 std::vector<double> b(100000);
484 for (size_t i = 0; i < a.size(); ++i)
485 {
486 a[i] = static_cast<double>(i);
487 b[i] = static_cast<double>(i) * 2;
488 }
489
490 std::cout << "Vectors a and b, each with " << a.size() << " elements\n\n";
491
492 // pzip_maps: Element-wise product
493 auto products = pzip_maps(a, b, [](double x, double y) {
494 return x * y;
495 }, options);
496
497 std::cout << "Element-wise products (first 5): ";
498 for (size_t i = 0; i < 5; ++i)
499 std::cout << products[i] << " ";
500 std::cout << "\n\n";
501
502 // pzip_foldl: Dot product
503 double dot_product = pzip_foldl(a, b, 0.0,
504 [](double acc, double x, double y) { return acc + x * y; }, options);
505
506 std::cout << "Dot product: " << std::fixed << std::setprecision(0)
507 << dot_product << "\n\n";
508
509 // pzip_for_each with side effects
510 std::atomic<double> sum{0};
511 pzip_for_each(a, b, [&sum](double x, double y) {
512 sum += x + y;
513 }, options);
514
515 std::cout << "Sum of all pairs: " << sum.load() << "\n\n";
516
517 std::cout << "✓ Zip operations completed\n";
518}
519
520// =============================================================================
521// EXAMPLE 9: Variadic Zip (N containers) - pzip_*_n functions
522// =============================================================================
523//
524// For 3+ containers, use the _n suffix functions:
525// - pzip_for_each_n
526// - pzip_maps_n
527// - pzip_foldl_n (requires a combiner function)
528// - pzip_all_n, pzip_exists_n, pzip_count_if_n
529//
530
532{
533 print_header("Example 9: Variadic Zip (N containers)");
534
535 std::cout << "GOAL: Process corresponding elements from 3+ containers.\n\n";
536
537 ThreadPool pool(std::thread::hardware_concurrency());
539 options.pool = &pool;
540 options.chunk_size = 2;
541
542 // Three vectors
543 std::vector<int> x = {1, 2, 3, 4, 5};
544 std::vector<int> y = {10, 20, 30, 40, 50};
545 std::vector<int> z = {100, 200, 300, 400, 500};
546
547 std::cout << "x = {1, 2, 3, 4, 5}\n";
548 std::cout << "y = {10, 20, 30, 40, 50}\n";
549 std::cout << "z = {100, 200, 300, 400, 500}\n\n";
550
551 // pzip_maps_n: Sum triplets
552 auto sums = pzip_maps_n([](int a, int b, int c) {
553 return a + b + c;
554 }, options, x, y, z);
555
556 std::cout << "x + y + z = ";
557 for (auto v : sums)
558 std::cout << v << " ";
559 std::cout << "\n\n";
560
561 // pzip_all_n: Check if all triplets satisfy condition
562 bool all_ordered = pzip_all_n([](int a, int b, int c) {
563 return a < b && b < c;
564 }, options, x, y, z);
565
566 std::cout << "All x[i] < y[i] < z[i]? " << (all_ordered ? "YES" : "NO") << "\n";
567
568 // pzip_count_if_n: Count triplets with sum > 100
569 size_t count = pzip_count_if_n([](int a, int b, int c) {
570 return a + b + c > 100;
571 }, options, x, y, z);
572
573 std::cout << "Triplets with sum > 100: " << count << "\n\n";
574
575 // Four vectors example
576 std::vector<double> v1 = {1.0, 2.0, 3.0};
577 std::vector<double> v2 = {1.0, 2.0, 3.0};
578 std::vector<double> v3 = {1.0, 2.0, 3.0};
579 std::vector<double> v4 = {1.0, 2.0, 3.0};
580
581 auto products = pzip_maps_n([](double a, double b, double c, double d) {
582 return a * b * c * d;
583 }, options, v1, v2, v3, v4);
584
585 std::cout << "v1 * v2 * v3 * v4 = ";
586 for (auto v : products)
587 std::cout << v << " ";
588 std::cout << "\n\n";
589
590 std::cout << "✓ Variadic zip operations completed\n";
591}
592
593// =============================================================================
594// EXAMPLE 10: Parallel Enumerate
595// =============================================================================
596//
597// Like Python's enumerate(), but parallel:
598// - penumerate_for_each: Apply function to (index, element) pairs
599// - penumerate_maps: Map (index, element) pairs to results
600//
601
603{
604 print_header("Example 10: Parallel Enumerate");
605
606 std::cout << "GOAL: Process elements along with their indices in parallel.\n\n";
607
608 ThreadPool pool(std::thread::hardware_concurrency());
610 options.pool = &pool;
611 options.chunk_size = 4;
612
613 // Initialize vector with indices
614 std::vector<int> data(10, 0);
615
616 // penumerate_for_each: Set each element to its index * 10
617 penumerate_for_each(data, [](size_t i, int& x) {
618 x = static_cast<int>(i * 10);
619 }, options);
620
621 std::cout << "After penumerate_for_each (x = i * 10): ";
622 for (auto x : data)
623 std::cout << x << " ";
624 std::cout << "\n\n";
625
626 // penumerate_maps: Create indexed strings
627 std::vector<std::string> words = {"apple", "banana", "cherry", "date", "elderberry"};
628
629 auto indexed = penumerate_maps(words,
630 [](size_t i, const std::string& s) {
631 return "[" + std::to_string(i) + "] " + s;
632 }, options);
633
634 std::cout << "Indexed strings:\n";
635 for (const auto& s : indexed)
636 std::cout << " " << s << "\n";
637 std::cout << "\n";
638
639 std::cout << "✓ Enumerate operations completed\n";
640}
641
642// =============================================================================
643// EXAMPLE 11: Scan / Merge / Partition Wrappers
644// =============================================================================
645//
646// These wrappers expose the foundational primitives from thread_pool.H using the
647// same container-oriented style as the rest of ah-parallel.H.
648//
649
651{
652 print_header("Example 11: Scan / Merge / Partition Wrappers");
653
654 std::cout << "GOAL: Use container-level wrappers for scan, merge, and partition.\n\n";
655
656 ThreadPool pool(std::thread::hardware_concurrency());
658 options.pool = &pool;
659 options.chunk_size = 4;
660
661 std::vector<int> values = {1, 2, 3, 4, 5, 6};
662 auto inclusive = pscan(values, std::plus<int>{}, options);
663 auto exclusive = pexclusive_scan(values, 0, std::plus<int>{}, options);
664
665 std::cout << "Inclusive scan: ";
666 for (int x : inclusive)
667 std::cout << x << " ";
668 std::cout << "\nExclusive scan: ";
669 for (int x : exclusive)
670 std::cout << x << " ";
671 std::cout << "\n\n";
672
673 std::vector<int> left = {1, 3, 5, 7};
674 std::vector<int> right = {2, 4, 6, 8};
675 auto merged = pmerge(left, right, std::less<int>{}, options);
676
677 std::cout << "Merged sorted ranges: ";
678 for (int x : merged)
679 std::cout << x << " ";
680 std::cout << "\n\n";
681
682 auto [evens, odds] = ppartition(values, [](int x) { return x % 2 == 0; }, options);
683 std::cout << "Partitioned evens: ";
684 for (int x : evens)
685 std::cout << x << " ";
686 std::cout << "\nPartitioned odds: ";
687 for (int x : odds)
688 std::cout << x << " ";
689 std::cout << "\n\n";
690
691 std::cout << "✓ Scan / merge / partition wrappers completed\n";
692}
693
694// =============================================================================
695// EXAMPLE 12: Performance Comparison
696// =============================================================================
697
699{
700 print_header("Example 12: Performance Comparison");
701
702 std::cout << "GOAL: Compare parallel vs sequential execution times.\n\n";
703
704 ThreadPool pool(std::thread::hardware_concurrency());
705
706 // Large dataset
707 std::vector<double> data(2000000);
708 std::iota(data.begin(), data.end(), 1.0);
709
710 std::cout << "Dataset: " << data.size() << " elements\n";
711 std::cout << "Threads: " << pool.num_threads() << "\n\n";
712
713 // CPU-intensive operation
714 auto expensive = [](double x) {
715 double result = x;
716 for (int i = 0; i < 50; ++i)
717 result = std::sin(result) * std::cos(result) + std::sqrt(std::abs(result));
718 return result;
719 };
720
721 // SEQUENTIAL
722 std::cout << "Running sequential map...\n";
723 auto seq_start = std::chrono::high_resolution_clock::now();
724
725 std::vector<double> seq_result(data.size());
726 std::transform(data.begin(), data.end(), seq_result.begin(), expensive);
727
728 auto seq_end = std::chrono::high_resolution_clock::now();
729 auto seq_ms = std::chrono::duration_cast<std::chrono::milliseconds>(seq_end - seq_start).count();
730
731 // PARALLEL
732 std::cout << "Running parallel map (pmaps)...\n\n";
733 auto par_start = std::chrono::high_resolution_clock::now();
734
735 auto par_result = pmaps(pool, data, expensive);
736
737 auto par_end = std::chrono::high_resolution_clock::now();
738 auto par_ms = std::chrono::duration_cast<std::chrono::milliseconds>(par_end - par_start).count();
739
740 // Results
741 double speedup = (par_ms > 0) ? static_cast<double>(seq_ms) / par_ms : 0;
742
743 std::cout << "┌────────────────────────────────────────┐\n";
744 std::cout << "│ PERFORMANCE RESULTS │\n";
745 std::cout << "├────────────────────────────────────────┤\n";
746 std::cout << "│ Sequential: " << std::setw(20) << seq_ms << " ms │\n";
747 std::cout << "│ Parallel: " << std::setw(20) << par_ms << " ms │\n";
748 std::cout << "├────────────────────────────────────────┤\n";
749 std::cout << "│ SPEEDUP: " << std::setw(20) << std::fixed
750 << std::setprecision(2) << speedup << "x │\n";
751 std::cout << "└────────────────────────────────────────┘\n";
752
753 // Verify correctness
754 bool match = std::equal(seq_result.begin(), seq_result.end(), par_result.begin(),
755 [](double a, double b) { return std::abs(a - b) < 1e-10; });
756 std::cout << "\n✓ Results match: " << (match ? "YES" : "NO") << "\n";
757}
758
759// =============================================================================
760// MAIN
761// =============================================================================
762
763int main()
764{
765 std::cout << "\n";
766 std::cout << "╔════════════════════════════════════════════════════════════════════╗\n";
767 std::cout << "║ ║\n";
768 std::cout << "║ ALEPH-W PARALLEL FUNCTIONAL PROGRAMMING EXAMPLES ║\n";
769 std::cout << "║ ║\n";
770 std::cout << "║ ML-style operations (map, filter, fold, zip, etc.) ║\n";
771 std::cout << "║ accelerated with multi-threading via ThreadPool ║\n";
772 std::cout << "║ ║\n";
773 std::cout << "╚════════════════════════════════════════════════════════════════════╝\n";
774
775 std::cout << "\nThis program demonstrates 12 parallel functional programming patterns.\n";
776 std::cout << "Read the source code comments for detailed explanations.\n";
777
790
791 std::cout << "\n";
792 std::cout << "╔════════════════════════════════════════════════════════════════════╗\n";
793 std::cout << "║ ✓ ALL EXAMPLES COMPLETED SUCCESSFULLY ║\n";
794 std::cout << "║ ║\n";
795 std::cout << "║ QUICK REFERENCE: ║\n";
796 std::cout << "║ pmaps(pool, c, f) → parallel map ║\n";
797 std::cout << "║ pfilter(pool, c, pred) → parallel filter ║\n";
798 std::cout << "║ pfoldl(pool, c, init, op) → parallel fold ║\n";
799 std::cout << "║ pfor_each(pool, c, f) → parallel for_each ║\n";
800 std::cout << "║ pall/pexists/pnone → parallel predicates ║\n";
801 std::cout << "║ pfind/pfind_value → parallel search ║\n";
802 std::cout << "║ psum/pproduct/pmin/pmax → parallel aggregations ║\n";
803 std::cout << "║ ppartition → parallel stable partition ║\n";
804 std::cout << "║ pscan/pexclusive_scan → parallel prefix scans ║\n";
805 std::cout << "║ pmerge → parallel merge of sorted inputs ║\n";
806 std::cout << "║ psort → parallel merge sort ║\n";
807 std::cout << "║ pzip_* → parallel zip (2 containers) ║\n";
808 std::cout << "║ pzip_*_n → parallel zip (N containers) ║\n";
809 std::cout << "║ penumerate_* → parallel enumerate ║\n";
810 std::cout << "╚════════════════════════════════════════════════════════════════════╝\n\n";
811
812 return 0;
813}
Parallel functional programming operations using ThreadPool.
void example_parallel_filter()
void example_parallel_sort()
void example_parallel_map()
void example_parallel_find()
void example_parallel_zip()
void example_parallel_aggregations()
void example_parallel_predicates()
void example_performance_comparison()
void example_variadic_zip()
void example_parallel_fold()
void example_parallel_scan_merge_partition()
void example_parallel_enumerate()
int main()
static size_t primes[]
A reusable thread pool for efficient parallel task execution.
size_t num_threads() const noexcept
Get the number of worker threads.
static mt19937 rng
static mpfr_t y
Definition mpfr_mul_d.c:3
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
bool pall(ThreadPool &pool, const Container &c, Pred pred, size_t chunk_size=0)
Parallel all predicate (short-circuit).
bool is_sorted(const Container< T > &cont, const Compare &cmp=Compare())
Check if a container is sorted in ascending order.
bool pnone(ThreadPool &pool, const Container &c, Pred pred, size_t chunk_size=0)
Parallel none predicate.
auto pmaps(ThreadPool &pool, const Container &c, Op op, size_t chunk_size=0)
Parallel map operation.
auto pmin(ThreadPool &pool, const Container &c, size_t chunk_size=0)
Parallel minimum element.
T pzip_foldl(ThreadPool &pool, const Container1 &c1, const Container2 &c2, T init, Op op, size_t chunk_size=0)
Parallel zip + fold.
void pzip_for_each(ThreadPool &pool, const Container1 &c1, const Container2 &c2, Op op, size_t chunk_size=0)
Parallel zip + for_each.
auto penumerate_maps(ThreadPool &pool, const Container &c, Op op, size_t chunk_size=0)
Parallel enumerate with map.
void penumerate_for_each(ThreadPool &pool, Container &c, Op op, size_t chunk_size=0)
Parallel for_each with index (enumerate).
bool pzip_all_n(ThreadPool &pool, Pred pred, const Containers &... cs)
Parallel all predicate over N zipped containers (variadic).
size_t pcount_if(ThreadPool &pool, const Container &c, Pred pred, size_t chunk_size=0)
Parallel count_if operation.
auto pexclusive_scan(ThreadPool &pool, const Container &c, T init, BinaryOp op, size_t chunk_size=0)
Parallel exclusive scan over a container.
std::optional< size_t > pfind(ThreadPool &pool, const Container &c, Pred pred, size_t chunk_size=0)
Parallel find operation (returns index).
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
void psort(ThreadPool &pool, Container &c, Compare cmp=Compare{}, const size_t min_parallel_size=1024)
Parallel sort (in-place).
auto pfilter(ThreadPool &pool, const Container &c, Pred pred, size_t chunk_size=0)
Parallel filter operation.
auto ppartition(ThreadPool &pool, const Container &c, Pred pred, size_t chunk_size=0)
Parallel partition (stable).
auto pmerge(ThreadPool &pool, const Container1 &c1, const Container2 &c2, Compare comp=Compare{}, size_t chunk_size=0)
Parallel merge of two sorted containers.
auto mean(const Container &data) -> std::decay_t< decltype(*std::begin(data))>
Compute the arithmetic mean.
Definition stat_utils.H:183
auto pmax(ThreadPool &pool, const Container &c, size_t chunk_size=0)
Parallel maximum element.
auto pzip_maps_n(ThreadPool &pool, Op op, const Containers &... cs)
Parallel map over N zipped containers (variadic).
auto pscan(ThreadPool &pool, const Container &c, BinaryOp op, size_t chunk_size=0)
Parallel inclusive scan over a container.
T pfoldl(ThreadPool &pool, const Container &c, T init, BinaryOp op, size_t chunk_size=0)
Parallel left fold (reduce).
auto pfind_value(ThreadPool &pool, const Container &c, Pred pred, size_t chunk_size=0)
Parallel find with value return.
auto pzip_maps(ThreadPool &pool, const Container1 &c1, const Container2 &c2, Op op, size_t chunk_size=0)
Parallel zip + map.
T psum(ThreadPool &pool, const Container &c, T init=T{}, size_t chunk_size=0)
Parallel sum of elements.
auto pminmax(ThreadPool &pool, const Container &c, size_t chunk_size=0)
Parallel min and max elements.
size_t pzip_count_if_n(ThreadPool &pool, Pred pred, const Containers &... cs)
Parallel count over N zipped containers (variadic).
bool pexists(ThreadPool &pool, const Container &c, Pred pred, size_t chunk_size=0)
Parallel exists predicate (short-circuit).
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
T sum(const Container &container, const T &init=T{})
Compute sum of all elements.
static struct argp_option options[]
Definition ntreepic.C:1886
void print_header()
Common configuration object for parallel algorithms.
ThreadPool * pool
Executor to use (nullptr = default_pool()).
bool is_prime(int n)