From 9072200cb721fd468e9eb01d5283697e81f0c1a9 Mon Sep 17 00:00:00 2001 From: Angel Pons Date: Tue, 16 Dec 2025 16:39:29 +0100 Subject: [PATCH 1/6] 6_Example_PackingProblem: Reformat files Signed-off-by: Angel Pons --- 6_Example_PackingProblem/main.cpp | 172 ++++++++++----------- 6_Example_PackingProblem/seqvsparallel.cpp | 114 +++++++------- 2 files changed, 140 insertions(+), 146 deletions(-) diff --git a/6_Example_PackingProblem/main.cpp b/6_Example_PackingProblem/main.cpp index 7f5a794..81efde2 100644 --- a/6_Example_PackingProblem/main.cpp +++ b/6_Example_PackingProblem/main.cpp @@ -9,99 +9,97 @@ using namespace std; using namespace oneapi; -vector doMAP(int a,int x[], int n) +vector doMAP(int a, int x[], int n) { - vector out(n); - - tbb::parallel_for( - tbb::blocked_range(0, n), - - // lambda function - [&](tbb::blocked_range r) { - for (auto i = r.begin(); i != r.end(); i++) { - out[i] = x[i]>=a; - } - } - - ); - return out; + vector out(n); + + tbb::parallel_for( + tbb::blocked_range(0, n), + + // lambda function + [&](tbb::blocked_range r) { + for (auto i = r.begin(); i != r.end(); i++) { + out[i] = x[i] >= a; + } + } + ); + return out; } - -int doSCAN(int out[], const int in[],int n){ - int total_sum = tbb::parallel_scan( - tbb::blocked_range(0, n), //range - 0, //id - [&](tbb::blocked_range r, int sum, bool is_final_scan){ - int tmp = sum; - for (int i = r.begin(); i < r.end(); ++i) { - tmp = tmp + in[i]; - if (is_final_scan) - out[i] = tmp; - } - return tmp; - }, - [&]( int left, int right ) { - return left + right; - } - ); - return total_sum; +int doSCAN(int out[], const int in[], int n) +{ + int total_sum = tbb::parallel_scan( + tbb::blocked_range(0, n), // range + 0, // id + [&](tbb::blocked_range r, int sum, bool is_final_scan) { + int tmp = sum; + for (int i = r.begin(); i < r.end(); ++i) { + tmp = tmp + in[i]; + if (is_final_scan) { + out[i] = tmp; + } + } + return tmp; + }, + [&](int left, int right) { + return left + right; + } + ); + return total_sum; } -//doMAPFilter(&out[0],&ix[0],&x[0],&filter_results[0], x.size()) -void doMAPFilter(int bolMatch[], int ixMatch[],int x[], int out[], int n){ - tbb::parallel_for( - tbb::blocked_range(0, n), - // lambda function - [&](tbb::blocked_range r) { - for (auto i = r.begin(); i < r.end(); i++) { - if (bolMatch[i]){ - out[ixMatch[i]-1] = x[i]; - } - } - } - ); +// doMAPFilter(&out[0],&ix[0],&x[0],&filter_results[0], x.size()) +void doMAPFilter(int bolMatch[], int ixMatch[], int x[], int out[], int n) +{ + tbb::parallel_for( + tbb::blocked_range(0, n), + // lambda function + [&](tbb::blocked_range r) { + for (auto i = r.begin(); i < r.end(); i++) { + if (bolMatch[i]) { + out[ixMatch[i] - 1] = x[i]; + } + } + } + ); } -int main(){ - - static int a = 10; - static vector x{7,1,0,13,0,15,20,-1}; +int main() +{ + static int a = 10; + static vector x{7, 1, 0, 13, 0, 15, 20, -1}; tbb::tick_count t0 = tbb::tick_count::now(); - - //MAP operation - vector bolMatch = doMAP(a,&x[0], x.size()); - - cout << "Map vector: "<< endl; - for (int i: bolMatch){ - cout << i << ','; - } - cout << endl; - - //SCAN - vector ixMatch(x.size()); - int sum = doSCAN(&ixMatch[0], &bolMatch[0], x.size()); //get index order - - cout << "Scan vector: " << sum << endl; - for (int i: ixMatch){ - cout << i << ','; - } - cout << endl; - - //JOIN - vector filtered_results(sum); - doMAPFilter(&bolMatch[0],&ixMatch[0],&x[0],&filtered_results[0], x.size()); - - - cout << "Filtered vector: " << endl; - for (int i: filtered_results){ - cout << i << ','; - } - cout << endl; - - cout << "\nTime: " << (tbb::tick_count::now()-t0).seconds() << "seconds" << endl; - - return 0; - - } + + // MAP operation + vector bolMatch = doMAP(a,&x[0], x.size()); + + cout << "Map vector: "<< endl; + for (int i : bolMatch) { + cout << i << ','; + } + cout << endl; + + // SCAN + vector ixMatch(x.size()); + int sum = doSCAN(&ixMatch[0], &bolMatch[0], x.size()); // get index order + + cout << "Scan vector: " << sum << endl; + for (int i : ixMatch) { + cout << i << ','; + } + cout << endl; + + // JOIN + vector filtered_results(sum); + doMAPFilter(&bolMatch[0], &ixMatch[0], &x[0], &filtered_results[0], x.size()); + + cout << "Filtered vector: " << endl; + for (int i : filtered_results) { + cout << i << ','; + } + cout << endl; + + cout << "\nTime: " << (tbb::tick_count::now() - t0).seconds() << "seconds" << endl; + return 0; +} diff --git a/6_Example_PackingProblem/seqvsparallel.cpp b/6_Example_PackingProblem/seqvsparallel.cpp index c7c9e49..92c35bf 100644 --- a/6_Example_PackingProblem/seqvsparallel.cpp +++ b/6_Example_PackingProblem/seqvsparallel.cpp @@ -15,91 +15,87 @@ using namespace tbb; template class Body { - T reduced_result; - T* const y; - const T* const x; + T reduced_result; + T* const y; + const T* const x; - public: +public: + Body(T y_[], const T x_[]) : reduced_result(0), x(x_), y(y_) {} - Body( T y_[], const T x_[] ) : reduced_result(0), x(x_), y(y_) {} + T get_reduced_result() const { return reduced_result; } - T get_reduced_result() const {return reduced_result;} + template + void operator()(const blocked_range& r, Tag) + { + T temp = reduced_result; - template - void operator()( const blocked_range& r, Tag ) - { - T temp = reduced_result; + for (int i = r.begin(); i < r.end(); ++i) { + temp = temp + x[i]; + if (Tag::is_final_scan()) { + y[i] = temp; + } + } - for( int i=r.begin(); i -float DoParallelScan( T y[], const T x[], int n) +float DoParallelScan(T y[], const T x[], int n) { - Body body(y,x); - tick_count t1,t2,t3,t4; - t1=tick_count::now(); - parallel_scan( blocked_range(0,n), body , auto_partitioner() ); - t2=tick_count::now(); - cout<<"Time Taken for parallel scan is \t"<<(t2-t1).seconds()< body(y, x); + tick_count t1, t2, t3, t4; + t1 = tick_count::now(); + parallel_scan(blocked_range(0, n), body, auto_partitioner()); + t2 = tick_count::now(); + cout << "Time Taken for parallel scan is \t" << (t2 - t1).seconds() << endl; + return body.get_reduced_result(); } template float SerialScan(T1 y[], const T1 x[], int n) { - tick_count t3,t4; - - t3=tick_count::now(); - T1 temp = 10; - - for( int i=1; i Date: Tue, 20 Jan 2026 14:02:01 +0100 Subject: [PATCH 2/6] 6_Example_PackingProblem: Overhaul seqsvparallel.cpp Remove unnecessary variables, make naming consistent, fix stdout messages getting mixed up, add several comments and add a note about optimisation showing that manual optimisations are not always good. Signed-off-by: Angel Pons --- 6_Example_PackingProblem/seqvsparallel.cpp | 117 ++++++++++++++------- 1 file changed, 77 insertions(+), 40 deletions(-) diff --git a/6_Example_PackingProblem/seqvsparallel.cpp b/6_Example_PackingProblem/seqvsparallel.cpp index 92c35bf..d8c333a 100644 --- a/6_Example_PackingProblem/seqvsparallel.cpp +++ b/6_Example_PackingProblem/seqvsparallel.cpp @@ -12,90 +12,127 @@ using namespace std; using namespace oneapi; using namespace tbb; -template +/** + * This allows selecting between two different versions of the inner + * loop function in the parallel scan implementation. This optimised + * version is more verbose, but avoids evaluating the same condition + * each and every loop iteration. + * + * When not using compiler optimisations, the unoptimised version is + * much worse (about +33% exec time for ARRAY_SIZE = 100000000). But + * with \c -O3 optimisations both approaches perform about the same, + * i.e. the compiler is smart enough to optimise this if allowed to. + * + * This shows that manual optimisations (which often make the source + * code harder to read and maintain) may be useless when things like + * the compiler can automatically perform the same optimisations. So + * consider profiling / benchmarking before wasting time optimising. + */ +#define USE_OPTIMISED_LOOP 0 + +template class Body { - T reduced_result; + T sum; T* const y; const T* const x; public: - Body(T y_[], const T x_[]) : reduced_result(0), x(x_), y(y_) {} + Body(T y_[], const T x_[]) : sum(0), x(x_), y(y_) {} - T get_reduced_result() const { return reduced_result; } + T get_sum() const { return sum; } template void operator()(const blocked_range& r, Tag) { - T temp = reduced_result; - +#if USE_OPTIMISED_LOOP + /** + * Evaluate \c Tag::is_final_scan() once outside the + * loop, but we need to have two separate loops. + */ + if (Tag::is_final_scan()) { + for (int i = r.begin(); i < r.end(); ++i) { + sum = sum + x[i]; + y[i] = sum; + } + } else { + for (int i = r.begin(); i < r.end(); ++i) { + sum = sum + x[i]; + } + } +#else + /** + * Less verbose, but \c Tag::is_final_scan() is + * evaluated each and every iteration. Or is it? + */ for (int i = r.begin(); i < r.end(); ++i) { - temp = temp + x[i]; + sum = sum + x[i]; if (Tag::is_final_scan()) { - y[i] = temp; + y[i] = sum; } } - - reduced_result = temp; +#endif } - Body(Body& b, split) : x(b.x), y(b.y), reduced_result(10) {} + Body(Body& b, split) : x(b.x), y(b.y), sum(0) {} void reverse_join(Body& a) { - reduced_result = a.reduced_result + reduced_result; + sum = a.sum + sum; } void assign(Body& b) { - reduced_result = b.reduced_result; + sum = b.sum; } }; - -template -float DoParallelScan(T y[], const T x[], int n) +template +T DoParallelScan(T y[], const T x[], int n) { - Body body(y, x); - tick_count t1, t2, t3, t4; - t1 = tick_count::now(); - parallel_scan(blocked_range(0, n), body, auto_partitioner()); - t2 = tick_count::now(); - cout << "Time Taken for parallel scan is \t" << (t2 - t1).seconds() << endl; - return body.get_reduced_result(); + Body body(y, x); + const tick_count t0 = tick_count::now(); + parallel_scan(blocked_range(0, n), body); + const tick_count t1 = tick_count::now(); + cout << "Time Taken for parallel scan is: " << (t1 - t0).seconds() << endl; + return body.get_sum(); } - -template -float SerialScan(T1 y[], const T1 x[], int n) +template +T DoSerialScan(T y[], const T x[], int n) { - tick_count t3, t4; - - t3 = tick_count::now(); - T1 temp = 10; - - for (int i = 1; i < n; ++i) { + const tick_count t0 = tick_count::now(); + T temp = 0; + for (int i = 0; i < n; ++i) { temp = temp + x[i]; y[i] = temp; } - t4 = tick_count::now(); - cout << "Time Taken for serial scan is \t" << (t4 - t3).seconds() << endl; + const tick_count t1 = tick_count::now(); + cout << "Time Taken for serial scan is: " << (t1 - t0).seconds() << endl; return temp; } +/** + * The size of the input and output arrays used to perform the serial + * and parallel scan operations. If the arrays do not fit in RAM then + * performance will be awful and/or the program may get killed. + */ +static const int ARRAY_SIZE = 1000000; + int main() { - int y1[100000], x1[100000]; + /* For some reason, using very large C-style arrays causes segfaults in the loop */ + std::vector y1(ARRAY_SIZE), x1(ARRAY_SIZE); - for (int i = 0; i < 100000; i++) { + for (int i = 0; i < ARRAY_SIZE; i++) { x1[i] = i; } - cout << fixed; - - cout << "\n serial scan output is \t" << SerialScan(y1, x1, 100000) << endl; + const int outSerial = DoSerialScan(y1.data(), x1.data(), ARRAY_SIZE); + const int outParallel = DoParallelScan(y1.data(), x1.data(), ARRAY_SIZE); - cout << "\n parallel scan output is \t" << DoParallelScan(y1, x1, 100000) << endl; + cout << " serial scan output is \t" << outSerial << endl; + cout << "parallel scan output is \t" << outParallel << endl; return 0; } From 42c39bac86f9f23614df3861b9bc6b02282a67b9 Mon Sep 17 00:00:00 2001 From: Angel Pons Date: Tue, 16 Dec 2025 17:09:00 +0100 Subject: [PATCH 3/6] 6_ExamplePackingProblem: Use `size_t` Array lengths are best expressed using `size_t` instead of `int`. Signed-off-by: Angel Pons --- 6_Example_PackingProblem/main.cpp | 20 ++++++++++---------- 6_Example_PackingProblem/seqvsparallel.cpp | 22 +++++++++++----------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/6_Example_PackingProblem/main.cpp b/6_Example_PackingProblem/main.cpp index 81efde2..6033283 100644 --- a/6_Example_PackingProblem/main.cpp +++ b/6_Example_PackingProblem/main.cpp @@ -14,11 +14,11 @@ vector doMAP(int a, int x[], int n) vector out(n); tbb::parallel_for( - tbb::blocked_range(0, n), + tbb::blocked_range(0, n), // lambda function - [&](tbb::blocked_range r) { - for (auto i = r.begin(); i != r.end(); i++) { + [&](tbb::blocked_range r) { + for (size_t i = r.begin(); i != r.end(); i++) { out[i] = x[i] >= a; } } @@ -26,14 +26,14 @@ vector doMAP(int a, int x[], int n) return out; } -int doSCAN(int out[], const int in[], int n) +int doSCAN(int out[], const int in[], size_t n) { int total_sum = tbb::parallel_scan( - tbb::blocked_range(0, n), // range + tbb::blocked_range(0, n), // range 0, // id - [&](tbb::blocked_range r, int sum, bool is_final_scan) { + [&](tbb::blocked_range r, int sum, bool is_final_scan) { int tmp = sum; - for (int i = r.begin(); i < r.end(); ++i) { + for (size_t i = r.begin(); i < r.end(); ++i) { tmp = tmp + in[i]; if (is_final_scan) { out[i] = tmp; @@ -49,10 +49,10 @@ int doSCAN(int out[], const int in[], int n) } // doMAPFilter(&out[0],&ix[0],&x[0],&filter_results[0], x.size()) -void doMAPFilter(int bolMatch[], int ixMatch[], int x[], int out[], int n) +void doMAPFilter(int bolMatch[], int ixMatch[], int x[], int out[], size_t n) { tbb::parallel_for( - tbb::blocked_range(0, n), + tbb::blocked_range(0, n), // lambda function [&](tbb::blocked_range r) { for (auto i = r.begin(); i < r.end(); i++) { @@ -66,7 +66,7 @@ void doMAPFilter(int bolMatch[], int ixMatch[], int x[], int out[], int n) int main() { - static int a = 10; + static size_t a = 10; static vector x{7, 1, 0, 13, 0, 15, 20, -1}; tbb::tick_count t0 = tbb::tick_count::now(); diff --git a/6_Example_PackingProblem/seqvsparallel.cpp b/6_Example_PackingProblem/seqvsparallel.cpp index d8c333a..52d0d2e 100644 --- a/6_Example_PackingProblem/seqvsparallel.cpp +++ b/6_Example_PackingProblem/seqvsparallel.cpp @@ -43,7 +43,7 @@ class Body T get_sum() const { return sum; } template - void operator()(const blocked_range& r, Tag) + void operator()(const blocked_range& r, Tag) { #if USE_OPTIMISED_LOOP /** @@ -51,12 +51,12 @@ class Body * loop, but we need to have two separate loops. */ if (Tag::is_final_scan()) { - for (int i = r.begin(); i < r.end(); ++i) { + for (size_t i = r.begin(); i < r.end(); ++i) { sum = sum + x[i]; y[i] = sum; } } else { - for (int i = r.begin(); i < r.end(); ++i) { + for (size_t i = r.begin(); i < r.end(); ++i) { sum = sum + x[i]; } } @@ -65,7 +65,7 @@ class Body * Less verbose, but \c Tag::is_final_scan() is * evaluated each and every iteration. Or is it? */ - for (int i = r.begin(); i < r.end(); ++i) { + for (size_t i = r.begin(); i < r.end(); ++i) { sum = sum + x[i]; if (Tag::is_final_scan()) { y[i] = sum; @@ -88,22 +88,22 @@ class Body }; template -T DoParallelScan(T y[], const T x[], int n) +T DoParallelScan(T y[], const T x[], size_t n) { Body body(y, x); const tick_count t0 = tick_count::now(); - parallel_scan(blocked_range(0, n), body); + parallel_scan(blocked_range(0, n), body); const tick_count t1 = tick_count::now(); cout << "Time Taken for parallel scan is: " << (t1 - t0).seconds() << endl; return body.get_sum(); } template -T DoSerialScan(T y[], const T x[], int n) +T DoSerialScan(T y[], const T x[], size_t n) { const tick_count t0 = tick_count::now(); T temp = 0; - for (int i = 0; i < n; ++i) { + for (size_t i = 0; i < n; ++i) { temp = temp + x[i]; y[i] = temp; } @@ -117,15 +117,15 @@ T DoSerialScan(T y[], const T x[], int n) * and parallel scan operations. If the arrays do not fit in RAM then * performance will be awful and/or the program may get killed. */ -static const int ARRAY_SIZE = 1000000; +static const size_t ARRAY_SIZE = 1000000; int main() { /* For some reason, using very large C-style arrays causes segfaults in the loop */ std::vector y1(ARRAY_SIZE), x1(ARRAY_SIZE); - for (int i = 0; i < ARRAY_SIZE; i++) { - x1[i] = i; + for (size_t i = 0; i < ARRAY_SIZE; i++) { + x1[i] = static_cast(i & 0x7fffffff); } const int outSerial = DoSerialScan(y1.data(), x1.data(), ARRAY_SIZE); From 63b5c47fc4b8fd6316ddaf7d39f9b47b2f10705f Mon Sep 17 00:00:00 2001 From: Angel Pons Date: Tue, 20 Jan 2026 10:48:43 +0100 Subject: [PATCH 4/6] 6_Example_PackingProblem: Overhaul with templates Templatise the map, scan and filter functions so that they can be used with any compatible type, and introduce a few more templatised helpers to (hopefully) make the code more elegant. This also overhauls printing vectors so that columns are aligned. This makes it easier to correlate values from various vectors. Finally, add several comments to explain how the program works. Signed-off-by: Angel Pons --- 6_Example_PackingProblem/main.cpp | 228 +++++++++++++++++++++++------- 1 file changed, 177 insertions(+), 51 deletions(-) diff --git a/6_Example_PackingProblem/main.cpp b/6_Example_PackingProblem/main.cpp index 6033283..68fcb0d 100644 --- a/6_Example_PackingProblem/main.cpp +++ b/6_Example_PackingProblem/main.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -9,97 +10,222 @@ using namespace std; using namespace oneapi; -vector doMAP(int a, int x[], int n) +/** + * Perform a parallel map on the elements of an input array and store + * them in a caller-provided output array. Should work with any other + * "array-like" data type that supports the subscript operator `[]`. + * + * If the template parameters seem confusing, imagine that: + * - \c OutArrT is \c OutT[] + * - \c InArrT is \c InT[] + * - \c FuncT is \c OutT(InT) + * + * NOTE: Thanks to TAD (Template Argument Deduction), we do not need to + * specify any template parameters for this function template. + * + * @param out Output array + * @param in Input array + * @param func Map function + * @param n Length of the arrays + */ +template +void doMap(OutArrT& out, const InArrT& in, FuncT&& func, size_t n) { - vector out(n); - tbb::parallel_for( tbb::blocked_range(0, n), // lambda function [&](tbb::blocked_range r) { for (size_t i = r.begin(); i != r.end(); i++) { - out[i] = x[i] >= a; + out[i] = invoke(func, in[i]); } } ); - return out; } -int doSCAN(int out[], const int in[], size_t n) +/** + * Calculate a parallel prefix/scan of an input array and store the + * individual results in an output array. Should work with any other + * "array-like" data type that supports the subscript operator `[]`. + * + * The TBB function for a parallel scan takes two functions: one to + * perform a sequential scan over a range, and another to combine two + * summaries. This abstraction defines the former function from the + * latter, so the caller only needs to provide the combiner function. + * + * If the template parameters seem confusing, imagine that: + * - \c ArrT is \c T[] + * - \c InArrT is \c InT[] + * - \c FuncT is \c T(T, T) + * + * The only reason why \c InArrT and \c ArrT are different types is to + * enable implicit casting of input values to the output type without + * having to do a separate map step. For example, implicitly casting + * booleans in an array to \c size_t when computing the prefix sum. + * + * NOTE: Thanks to TAD (Template Argument Deduction), we do not need to + * specify any template parameters for this function template. + * + * @param out Output array + * @param in Input array + * @param ident The identity element for the function + * @param func Combiner function + * @param n Length of the arrays + * + * @return The summary computed over the whole range + */ +template +T doScan(ArrT &out, const InArrT in, const T ident, FuncT&& func, size_t n) { - int total_sum = tbb::parallel_scan( + return tbb::parallel_scan( tbb::blocked_range(0, n), // range - 0, // id - [&](tbb::blocked_range r, int sum, bool is_final_scan) { - int tmp = sum; - for (size_t i = r.begin(); i < r.end(); ++i) { - tmp = tmp + in[i]; + ident, + [&](tbb::blocked_range r, T sum, bool is_final_scan) { + T tmp = sum; + for (size_t i = r.begin(); i != r.end(); i++) { + tmp = invoke(func, tmp, in[i]); if (is_final_scan) { out[i] = tmp; } } return tmp; }, - [&](int left, int right) { - return left + right; - } + //[&](T left, T right) { + // return invoke(func, left, right); + //} + func /* No lambda needed, just use the provided function directly */ ); - return total_sum; } -// doMAPFilter(&out[0],&ix[0],&x[0],&filter_results[0], x.size()) -void doMAPFilter(int bolMatch[], int ixMatch[], int x[], int out[], size_t n) +/** + * Filter the elements of an input array according to a boolean match + * array and store them in an output array at the position specified + * by the value in an index match array. Should work with any other + * "array-like" data type that supports the subscript operator `[]`. + * + * Note that a proper filter function would only take the input array + * and a \c bool(T) predicate function. However, this program is meant + * to show how a parallel filter function can be implemented, so the + * signature of this function takes the intermediate results that were + * previously computed. + * + * NOTE: Thanks to TAD (Template Argument Deduction), we do not need to + * specify any template parameters for this function template. + * + * @param out Output array + * @param in Input array + * @param bolMatch Array of bool-like (result of mapping a predicate on \p in array) + * @param ixMatch Array of indices plus 1 (result of prefix sum over \p bolMatch array) + * @param n Length of the input arrays (output array can be shorter) + */ +template +void doFilter(ArrT& out, const ArrT& in, const BoolArrT& bolMatch, const IdxArrT& ixMatch, size_t n) { tbb::parallel_for( tbb::blocked_range(0, n), - // lambda function - [&](tbb::blocked_range r) { - for (auto i = r.begin(); i < r.end(); i++) { + [&](tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); i++) { if (bolMatch[i]) { - out[ixMatch[i] - 1] = x[i]; + out[ixMatch[i] - 1] = in[i]; } } } ); } -int main() +/** + * Print the length and contents of a vector. Assumes elements can be + * printed using the \c << operator of \c cout directly. Attempts to + * pad values using \c setw() so that they remain aligned. + * + * Will also pad to ensure the + * + * @param vec The vector to print the info of + * @param name The name to show for this vector + */ +template +void printVec(const vector& vec, const string& name) { - static size_t a = 10; - static vector x{7, 1, 0, 13, 0, 15, 20, -1}; - - tbb::tick_count t0 = tbb::tick_count::now(); - - // MAP operation - vector bolMatch = doMAP(a,&x[0], x.size()); - - cout << "Map vector: "<< endl; - for (int i : bolMatch) { - cout << i << ','; + const string prefix = name + " [" + to_string(vec.size()) + "]:"; + cout << setw(16) << prefix; + for (const T& e : vec) { + cout << setw(4) << e << ","; } cout << endl; +} - // SCAN - vector ixMatch(x.size()); - int sum = doSCAN(&ixMatch[0], &bolMatch[0], x.size()); // get index order +/** + * Perform a parallel filter operation on an input vector. The output + * vector only contains the elements of the input vector for which the + * predicate returned true, preserving the order of the input elements. + * + * We use vectors here for convenience, since we need to allocate both + * intermediate and final results. + * + * If the template parameters seem confusing, imagine that: + * - \c PredT is \c bool(T) + * + * @param inputVec Input vector + * @param predicate Function to test each element with + * + * @return A vector with the filtered elements from the input + */ +template +vector vecFilter(const vector& inputVec, PredT&& predicate) +{ + const size_t n = inputVec.size(); + printVec(inputVec, "inputVec"); + + /** + * MAP: Apply the predicate to each element of the input vector. + * We obtain a \c bolMatch vector where each boolean value is + * the value returned by the predicate for a given input value. + */ + vector bolMatch(n); + doMap(bolMatch, inputVec, predicate, n); + printVec(bolMatch, "bolMatch"); + + /** + * SCAN: Compute the prefix sum of the \c bolMatch vector. The + * resulting \c ixMatch vector tells us the index in the output + * vector where a given input value needs to be stored (plus 1). + */ + const auto scanFunc = [](size_t a, size_t b) { return a + b; }; + const size_t identity = 0; + vector ixMatch(n); + /** + * We can calculate the output length (i.e. the number of values + * that passed the predicate) using a REDUCE operation. However, + * what \c doScan returns (the summary computed over the entire + * range) is exactly the same a REDUCE operation would return. + */ + size_t outSize = doScan(ixMatch, bolMatch, identity, scanFunc, n); + printVec(ixMatch, "ixMatch"); + + /** + * JOIN: Using \c bolMatch and \c ixMatch from previous steps, + * copy the input elements that passed the predicate (i.e. for + * which \c bolMatch is true) to the output vector, making use + * of \c ixMatch to know in which position the elements should + * be inserted. + */ + vector filteredVec(outSize); + doFilter(filteredVec, inputVec, bolMatch, ixMatch, n); + printVec(filteredVec, "filteredVec"); + + return filteredVec; +} - cout << "Scan vector: " << sum << endl; - for (int i : ixMatch) { - cout << i << ','; - } - cout << endl; +int main() +{ + const vector input{7, 1, 0, 13, 0, 15, 20, -1}; + const auto predicate = [](int x) { return x > 10; }; - // JOIN - vector filtered_results(sum); - doMAPFilter(&bolMatch[0], &ixMatch[0], &x[0], &filtered_results[0], x.size()); + tbb::tick_count t0 = tbb::tick_count::now(); - cout << "Filtered vector: " << endl; - for (int i : filtered_results) { - cout << i << ','; - } - cout << endl; + const vector output = vecFilter(input, predicate); - cout << "\nTime: " << (tbb::tick_count::now() - t0).seconds() << "seconds" << endl; + /* NOTE: this includes the time spent printing the contents of vectors */ + cout << "\nTime: " << (tbb::tick_count::now() - t0).seconds() << " seconds" << endl; return 0; } From 30b68d6e51caf330ef75bdb3be141decddef107c Mon Sep 17 00:00:00 2001 From: Angel Pons Date: Tue, 20 Jan 2026 17:57:53 +0100 Subject: [PATCH 5/6] 6_Example_PackingProblem: Drop leftover comment sentence Signed-off-by: Angel Pons --- 6_Example_PackingProblem/main.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/6_Example_PackingProblem/main.cpp b/6_Example_PackingProblem/main.cpp index 68fcb0d..82050b6 100644 --- a/6_Example_PackingProblem/main.cpp +++ b/6_Example_PackingProblem/main.cpp @@ -138,8 +138,6 @@ void doFilter(ArrT& out, const ArrT& in, const BoolArrT& bolMatch, const IdxArrT * printed using the \c << operator of \c cout directly. Attempts to * pad values using \c setw() so that they remain aligned. * - * Will also pad to ensure the - * * @param vec The vector to print the info of * @param name The name to show for this vector */ From b6f4a245135a1a247175f96d96f6d7fdd2cec8e7 Mon Sep 17 00:00:00 2001 From: Angel Pons Date: Tue, 20 Jan 2026 18:52:09 +0100 Subject: [PATCH 6/6] Pass some inputs as references Signed-off-by: Angel Pons --- 6_Example_PackingProblem/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/6_Example_PackingProblem/main.cpp b/6_Example_PackingProblem/main.cpp index 82050b6..22956bf 100644 --- a/6_Example_PackingProblem/main.cpp +++ b/6_Example_PackingProblem/main.cpp @@ -75,7 +75,7 @@ void doMap(OutArrT& out, const InArrT& in, FuncT&& func, size_t n) * @return The summary computed over the whole range */ template -T doScan(ArrT &out, const InArrT in, const T ident, FuncT&& func, size_t n) +T doScan(ArrT &out, const InArrT& in, const T& ident, FuncT&& func, size_t n) { return tbb::parallel_scan( tbb::blocked_range(0, n), // range