X Tutup
// 2019/03/29 modified by Tsung-Wei Huang // - added bounded workstealing // // 2019/02/15 modified by Tsung-Wei Huang // - modified batch_insertion // // 2018/12/08 modified by Tsung-Wei Huang // - refactored the output format // // 2018/12/07 modified by Tsung-Wei Huang // - refactored the output format // // 2018/12/06 modified by Tsung-Wei Huang // - added nested insertions test // // 2018/12/04 modified by Tsung-Wei Huang // - replace privatized threadpool with work stealing threadpool // // 2018/10/04 modified by Tsung-Wei Huang // - removed binary_tree // - removed modulo_insertions // - adopted to the new threadpool implementation // // 2018/09/19 modified by Tsung-Wei Huang // - added binary_tree benchmark // - added modulo_insertions benchmark // - refactored benchmark calls // // 2018/08/31 contributed by Guannan // // Examples to test different threadpool implementations: // - SimpleThreadpool // - ProactiveThreadpool // - SpeculativeThreadpool // - PrivatizedThreadpool #include #include #include #include #include #include #include constexpr int WIDTH = 12; using Closure = std::function; // Procedure: benchmark #define BENCHMARK(TITLE, F) \ std::cout \ << std::setw(WIDTH) << TITLE << std::flush \ << std::setw(WIDTH) << F>() << std::flush \ << std::setw(WIDTH) << F>() << std::flush \ << std::setw(WIDTH) << F>() << std::flush \ << std::setw(WIDTH) << F>() << std::flush \ << std::setw(WIDTH) << F>() << std::flush \ << std::endl; // ============================================================================ // Divide and conquer to solve max subarray sum problem // https://www.geeksforgeeks.org/divide-and-conquer-maximum-sum-subarray/ // ============================================================================ constexpr auto tree_height = 20u; constexpr auto total_nodes = 1u << tree_height; void update_max(std::atomic& max_val, const int value) { int old = max_val; while(old < value && !max_val.compare_exchange_weak(old, value)); } int max_cross_sum(const std::vector& vec, int l, int m, int r){ // Include elements on left of mid. auto sum = 0; auto left_sum = INT_MIN; for (auto i = m; i >= l; i--){ sum = sum + vec[i]; if (sum > left_sum) left_sum = sum; } // Include elements on right of mid sum = 0; auto right_sum = INT_MIN; for (auto i = m+1; i <= r; i++) { sum = sum + vec[i]; if (sum > right_sum) right_sum = sum; } // Return sum of elements on left and right of mid return left_sum + right_sum; } template void max_subsum( const std::vector& vec, int l, int r, std::atomic& max_num, T& tp, std::atomic& counter, std::promise& promise ) { // Base Case: Only one element if (l == r) { update_max(max_num, vec[l]); if(++counter == total_nodes*2-1){ promise.set_value(); } return ; } // Find middle point int m = (l + r)/2; tp.emplace([&, l=l, m=m] () { max_subsum(vec, l, m, max_num, tp, counter, promise); }); tp.emplace([&, m=m, r=r] () { max_subsum(vec, m+1, r, max_num, tp, counter, promise); }); update_max(max_num, max_cross_sum(vec, l, m, r)); if(++counter == total_nodes*2-1){ promise.set_value(); } } template auto subsum(){ std::vector vec(total_nodes); std::iota(vec.begin(), vec.end(), -50); std::atomic result {INT_MIN}; std::atomic counter{0}; std::promise promise; auto future = promise.get_future(); auto start = std::chrono::high_resolution_clock::now(); T tp(std::thread::hardware_concurrency()); max_subsum(vec, 0, total_nodes-1, result, tp, counter, promise); future.get(); auto end = std::chrono::high_resolution_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); return elapsed.count(); } // ============================================================================ // Dynamic tasking through linear insertions // ============================================================================ // Procedure: linear_insertions template auto linear_insertions() { const int num_threads = std::thread::hardware_concurrency(); const int num_tasks = 2000000; auto beg = std::chrono::high_resolution_clock::now(); T threadpool(num_threads); std::atomic sum {0}; std::function insert; std::promise promise; auto future = promise.get_future(); insert = [&threadpool, &insert, &sum, &promise] (int i) { if(i > 0) { threadpool.emplace([i=i-1, &insert] () { insert(i); }); } else { if(size_t s = ++sum; s == threadpool.num_workers()) { promise.set_value(1); } } }; for(int i=0; i(end - beg).count(); } // ============================================================================ // Insertions with atomic summation // ============================================================================ // Function: atomic_add template auto atomic_add() { const int num_threads = std::thread::hardware_concurrency(); const int num_tasks = 1000000; std::atomic counter(0); auto beg = std::chrono::high_resolution_clock::now(); std::promise promise; auto future = promise.get_future(); T threadpool(num_threads); for(size_t i=0; i(end - beg).count(); } // ============================================================================ // skewed insertions // ============================================================================ // Function: nested_insertions template auto nested_insertions() { const int num_threads = std::thread::hardware_concurrency(); const int num_tasks = 32; auto beg = std::chrono::high_resolution_clock::now(); std::atomic counter(0); std::promise promise; auto future = promise.get_future(); auto increment = [&] () { int64_t sum = 0; for(int i=0; i<5; ++i) { sum = (sum + 1)*num_tasks; } if(++counter == sum) { promise.set_value(); } }; T threadpool(num_threads); threadpool.emplace([&] () { std::this_thread::sleep_for(std::chrono::microseconds(100)); for(int i=0; i(end - beg).count(); } // ============================================================================ // batch insertion // ============================================================================ // Function: batch_insertions template auto batch_insertions() { const int num_threads = std::thread::hardware_concurrency(); const int num_batches = 512; const int num_tasks = 512; auto beg = std::chrono::high_resolution_clock::now(); std::atomic counter(0); std::vector> tasks (num_tasks); std::promise promise; auto future = promise.get_future(); for(auto & task : tasks) { task = [&] () { if(++counter == 2*num_tasks*num_batches) { promise.set_value(); } }; } T threadpool(num_threads); // master to insert a batch for(size_t i=0; i(end - beg).count(); } // Function: main int main(int argc, char* argv[]) { std::cout << std::setw(WIDTH) << "workload" << std::setw(WIDTH) << "simple" << std::setw(WIDTH) << "pro" << std::setw(WIDTH) << "spec" << std::setw(WIDTH) << "steal" << std::setw(WIDTH) << "eigen" << std::endl; BENCHMARK("Atomic", atomic_add); BENCHMARK("Linear", linear_insertions); BENCHMARK("D&C", subsum); BENCHMARK("Batch", batch_insertions); BENCHMARK("Nested", nested_insertions); return 0; }
X Tutup