X Tutup

// 2018/10/04 modified by Tsung-Wei Huang // - removed binary_tree // - removed modulo_insertions // - adopted to the new threadpool implementation // // 2018/09/19 modified by Tsung-Wei Huang // - added binary_tree benchmark // - added modulo_insertions benchmark // - refactored benchmark calls // // 2018/08/31 contributed by Guannan // // Examples to test different threadpool implementations: // - SimpleThreadpool // - ProactiveThreadpool // - SpeculativeThreadpool // - PrivatizedThreadpool #include #include #include #include #include // Procedure: benchmark #define BENCHMARK(TITLE, F) \ std::cout << "========== " << TITLE << " ==========\n"; \ \ std::cout << "Threadpool [simple ] elapsed time: " \ << F>>() << " ms\n"; \ \ std::cout << "Threadpool [proactive ] elapsed time: " \ << F>>() << " ms\n"; \ \ std::cout << "Threadpool [speculative] elapsed time: " \ << F>>() << " ms\n"; \ \ std::cout << "Threadpool [privatized ] elapsed time: " \ << F>>() << " ms\n"; \ // ============================================================================ // Divide and conquer to solve max subarray sum problem // https://www.geeksforgeeks.org/divide-and-conquer-maximum-sum-subarray/ // ============================================================================ constexpr auto tree_height = 20u; constexpr auto total_nodes = 1u << tree_height; void update_max(std::atomic& max_val, const int value) { int old = max_val; while(old < value && !max_val.compare_exchange_weak(old, value)); } int max_cross_sum(const std::vector& vec, int l, int m, int r){ // Include elements on left of mid. auto sum = 0; auto left_sum = INT_MIN; for (auto i = m; i >= l; i--){ sum = sum + vec[i]; if (sum > left_sum) left_sum = sum; } // Include elements on right of mid sum = 0; auto right_sum = INT_MIN; for (auto i = m+1; i <= r; i++) { sum = sum + vec[i]; if (sum > right_sum) right_sum = sum; } // Return sum of elements on left and right of mid return left_sum + right_sum; } template void max_subsum( const std::vector& vec, int l, int r, std::atomic& max_num, T& tp, std::atomic& counter, std::promise& promise ) { // Base Case: Only one element if (l == r) { update_max(max_num, vec[l]); if(++counter == total_nodes*2-1){ promise.set_value(); } return ; } // Find middle point int m = (l + r)/2; tp.emplace([&, l=l, m=m] () { max_subsum(vec, l, m, max_num, tp, counter, promise); }); tp.emplace([&, m=m, r=r] () { max_subsum(vec, m+1, r, max_num, tp, counter, promise); }); update_max(max_num, max_cross_sum(vec, l, m, r)); if(++counter == total_nodes*2-1){ promise.set_value(); } } template auto subsum(){ std::vector vec(total_nodes); std::iota(vec.begin(), vec.end(), -50); std::atomic result {INT_MIN}; std::atomic counter{0}; std::promise promise; auto future = promise.get_future(); auto start = std::chrono::high_resolution_clock::now(); T tp(std::thread::hardware_concurrency()); max_subsum(vec, 0, total_nodes-1, result, tp, counter, promise); future.get(); auto end = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); return elapsed.count(); } // ============================================================================ // Dynamic tasking through linear insertions // ============================================================================ // Procedure: linear_insertions template auto linear_insertions() { const int num_threads = std::thread::hardware_concurrency(); const int num_tasks = 2000000; auto beg = std::chrono::high_resolution_clock::now(); T threadpool(num_threads); std::atomic sum {0}; std::function insert; std::promise promise; auto future = promise.get_future(); insert = [&threadpool, &insert, &sum, &promise] (int i) { if(i > 0) { threadpool.emplace([i=i-1, &insert] () { insert(i); }); } else { if(size_t s = ++sum; s == threadpool.num_workers()) { promise.set_value(1); } } }; for(int i=0; i(end - beg).count(); } // ============================================================================ // Insertions with atomic summation // ============================================================================ // Function: atomic_add template auto atomic_add() { const int num_threads = std::thread::hardware_concurrency(); const int num_tasks = 1000000; std::atomic counter(0); auto beg = std::chrono::high_resolution_clock::now(); std::promise promise; auto future = promise.get_future(); T threadpool(num_threads); for(size_t i=0; i(end - beg).count(); } // Function: main int main(int argc, char* argv[]) { BENCHMARK("Atomic Add", atomic_add); BENCHMARK("Linear Insertions", linear_insertions); BENCHMARK("Divide and Conquer", subsum); return 0; }

X Tutup