// 2019/03/29 modified by Tsung-Wei Huang
// - added bounded workstealing
//
// 2019/02/15 modified by Tsung-Wei Huang
// - modified batch_insertion
//
// 2018/12/08 modified by Tsung-Wei Huang
// - refactored the output format
//
// 2018/12/07 modified by Tsung-Wei Huang
// - refactored the output format
//
// 2018/12/06 modified by Tsung-Wei Huang
// - added nested insertions test
//
// 2018/12/04 modified by Tsung-Wei Huang
// - replace privatized threadpool with work stealing threadpool
//
// 2018/10/04 modified by Tsung-Wei Huang
// - removed binary_tree
// - removed modulo_insertions
// - adopted to the new threadpool implementation
//
// 2018/09/19 modified by Tsung-Wei Huang
// - added binary_tree benchmark
// - added modulo_insertions benchmark
// - refactored benchmark calls
//
// 2018/08/31 contributed by Guannan
//
// Examples to test different threadpool implementations:
// - SimpleThreadpool
// - ProactiveThreadpool
// - SpeculativeThreadpool
// - PrivatizedThreadpool
#include
#include
#include
#include
#include
#include
#include
constexpr int WIDTH = 12;
using Closure = std::function;
// Procedure: benchmark
#define BENCHMARK(TITLE, F) \
std::cout \
<< std::setw(WIDTH) << TITLE << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::endl;
// ============================================================================
// Divide and conquer to solve max subarray sum problem
// https://www.geeksforgeeks.org/divide-and-conquer-maximum-sum-subarray/
// ============================================================================
constexpr auto tree_height = 20u;
constexpr auto total_nodes = 1u << tree_height;
void update_max(std::atomic& max_val, const int value) {
int old = max_val;
while(old < value && !max_val.compare_exchange_weak(old, value));
}
int max_cross_sum(const std::vector& vec, int l, int m, int r){
// Include elements on left of mid.
auto sum = 0;
auto left_sum = INT_MIN;
for (auto i = m; i >= l; i--){
sum = sum + vec[i];
if (sum > left_sum)
left_sum = sum;
}
// Include elements on right of mid
sum = 0;
auto right_sum = INT_MIN;
for (auto i = m+1; i <= r; i++)
{
sum = sum + vec[i];
if (sum > right_sum)
right_sum = sum;
}
// Return sum of elements on left and right of mid
return left_sum + right_sum;
}
template
void max_subsum(
const std::vector& vec,
int l, int r,
std::atomic& max_num,
T& tp,
std::atomic& counter,
std::promise& promise
) {
// Base Case: Only one element
if (l == r) {
update_max(max_num, vec[l]);
if(++counter == total_nodes*2-1){
promise.set_value();
}
return ;
}
// Find middle point
int m = (l + r)/2;
tp.emplace([&, l=l, m=m] () {
max_subsum(vec, l, m, max_num, tp, counter, promise);
});
tp.emplace([&, m=m, r=r] () {
max_subsum(vec, m+1, r, max_num, tp, counter, promise);
});
update_max(max_num, max_cross_sum(vec, l, m, r));
if(++counter == total_nodes*2-1){
promise.set_value();
}
}
template
auto subsum(){
std::vector vec(total_nodes);
std::iota(vec.begin(), vec.end(), -50);
std::atomic result {INT_MIN};
std::atomic counter{0};
std::promise promise;
auto future = promise.get_future();
auto start = std::chrono::high_resolution_clock::now();
T tp(std::thread::hardware_concurrency());
max_subsum(vec, 0, total_nodes-1, result, tp, counter, promise);
future.get();
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast(end - start);
return elapsed.count();
}
// ============================================================================
// Dynamic tasking through linear insertions
// ============================================================================
// Procedure: linear_insertions
template
auto linear_insertions() {
const int num_threads = std::thread::hardware_concurrency();
const int num_tasks = 2000000;
auto beg = std::chrono::high_resolution_clock::now();
T threadpool(num_threads);
std::atomic sum {0};
std::function insert;
std::promise promise;
auto future = promise.get_future();
insert = [&threadpool, &insert, &sum, &promise] (int i) {
if(i > 0) {
threadpool.emplace([i=i-1, &insert] () {
insert(i);
});
}
else {
if(size_t s = ++sum; s == threadpool.num_workers()) {
promise.set_value(1);
}
}
};
for(int i=0; i(end - beg).count();
}
// ============================================================================
// Insertions with atomic summation
// ============================================================================
// Function: atomic_add
template
auto atomic_add() {
const int num_threads = std::thread::hardware_concurrency();
const int num_tasks = 1000000;
std::atomic counter(0);
auto beg = std::chrono::high_resolution_clock::now();
std::promise promise;
auto future = promise.get_future();
T threadpool(num_threads);
for(size_t i=0; i(end - beg).count();
}
// ============================================================================
// skewed insertions
// ============================================================================
// Function: nested_insertions
template
auto nested_insertions() {
const int num_threads = std::thread::hardware_concurrency();
const int num_tasks = 32;
auto beg = std::chrono::high_resolution_clock::now();
std::atomic counter(0);
std::promise promise;
auto future = promise.get_future();
auto increment = [&] () {
int64_t sum = 0;
for(int i=0; i<5; ++i) {
sum = (sum + 1)*num_tasks;
}
if(++counter == sum) {
promise.set_value();
}
};
T threadpool(num_threads);
threadpool.emplace([&] () {
std::this_thread::sleep_for(std::chrono::microseconds(100));
for(int i=0; i(end - beg).count();
}
// ============================================================================
// batch insertion
// ============================================================================
// Function: batch_insertions
template
auto batch_insertions() {
const int num_threads = std::thread::hardware_concurrency();
const int num_batches = 512;
const int num_tasks = 512;
auto beg = std::chrono::high_resolution_clock::now();
std::atomic counter(0);
std::vector> tasks (num_tasks);
std::promise promise;
auto future = promise.get_future();
for(auto & task : tasks) {
task = [&] () {
if(++counter == 2*num_tasks*num_batches) {
promise.set_value();
}
};
}
T threadpool(num_threads);
// master to insert a batch
for(size_t i=0; i(end - beg).count();
}
// Function: main
int main(int argc, char* argv[]) {
std::cout << std::setw(WIDTH) << "workload"
<< std::setw(WIDTH) << "simple"
<< std::setw(WIDTH) << "pro"
<< std::setw(WIDTH) << "spec"
<< std::setw(WIDTH) << "steal"
<< std::setw(WIDTH) << "eigen"
<< std::endl;
BENCHMARK("Atomic", atomic_add);
BENCHMARK("Linear", linear_insertions);
BENCHMARK("D&C", subsum);
BENCHMARK("Batch", batch_insertions);
BENCHMARK("Nested", nested_insertions);
return 0;
}