// 2019/02/20 - modified by Tsung-Wei Huang
// - added empty_subflow benchmarks
// - added steady_subflow benchmarks
//
// 2018/12/04 - modified by Tsung-Wei Huang
// - replaced privatized threadpool with work stealing threadpool
//
// 2018/10/24 - modified by Tsung-Wei Huang
// - Taskflow is templated at threadpool
// - added graph-level comparison with different thread pools
//
// 2018/09/19 - created by Tsung-Wei Huang
//
// This program is used to benchmark the taskflow under different types
// of workloads.
#include
#include
#include
#include
#include
constexpr int WIDTH = 15;
// Procedure: benchmark
#define BENCHMARK(TITLE, F) \
std::cout \
<< std::setw(WIDTH) << TITLE << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::setw(WIDTH) << F>() << std::flush \
<< std::endl;
// ============================================================================
// Dynamic Stem
// ============================================================================
// Function: dynamic_stem
template
auto dynamic_stem() {
auto beg = std::chrono::high_resolution_clock::now();
{
const int L = 1024;
std::atomic sum {0};
T tf;
std::optional prev;
for(int l=0; l p;
for(int k=0; kprecede(c);
}
p = c;
}
if(l & 1) {
subflow.detach();
}
});
if(prev) {
prev->precede(curr);
}
prev = curr;
}
tf.wait_for_all();
assert(sum == L*(L+1));
}
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast(end - beg).count();
}
// ============================================================================
// Map-Reduce
// ============================================================================
// Function: map_reduce
template
auto map_reduce() {
auto beg = std::chrono::high_resolution_clock::now();
{
const int num_batches = 65536;
std::vector C(1024, 10);
std::atomic sum {0};
T tf;
std::optional prev;
for(int i=0; iprecede(s);
}
prev = t;
}
tf.wait_for_all();
assert(sum == num_batches * C.size() * 10);
}
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast(end - beg).count();
}
// ============================================================================
// Level Graph
// ============================================================================
// Function: level_graph
template
auto level_graph() {
const int num_levels = 2048;
const int num_nodes_per_level = 1024;
auto beg = std::chrono::high_resolution_clock::now();
{
std::atomic sum {0};
T tf;
std::vector< std::vector > tasks;
tasks.resize(num_levels);
for(int l=0; l(end - beg).count();
}
// ============================================================================
// Linear Graph
// ============================================================================
// Function: linear_graph
template
auto linear_graph() {
const int num_nodes = 1000000;
auto beg = std::chrono::high_resolution_clock::now();
{
size_t sum {0};
T tf;
std::vector tasks;
for(int i=0; i(end - beg).count();
}
// ============================================================================
// Binary Tree
// ============================================================================
// Function: binary_tree
template
auto binary_tree() {
const int num_levels = 21;
auto beg = std::chrono::high_resolution_clock::now();
{
T tf;
std::atomic sum {0};
std::function insert;
insert = [&] (int l, tf::Task parent) {
if(l < num_levels) {
auto lc = tf.emplace([&] () {
sum.fetch_add(1, std::memory_order_relaxed);
});
auto rc = tf.emplace([&] () {
sum.fetch_add(1, std::memory_order_relaxed);
});
parent.precede(lc);
parent.precede(rc);
insert(l+1, lc);
insert(l+1, rc);
}
};
auto root = tf.emplace([&] () {
sum.fetch_add(1, std::memory_order_relaxed);
});
insert(1, root);
// synchronize until all tasks finish
tf.wait_for_all();
assert(sum == (1 << (num_levels)) - 1);
}
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast(end - beg).count();
}
// ============================================================================
// Empty Jobs
// ============================================================================
// Function: empty_jobs
template
auto empty_jobs() {
const int num_tasks = 2000000;
auto beg = std::chrono::high_resolution_clock::now();
{
T tf;
for(size_t i=0; i(end - beg).count();
}
// ============================================================================
// Atomic add
// ============================================================================
// Function: atomic_add
template
auto atomic_add() {
const int num_tasks = 1000000;
auto beg = std::chrono::high_resolution_clock::now();
{
std::atomic counter(0);
T tf;
for(size_t i=0; i(end - beg).count();
}
// ============================================================================
// Multiple Dispatches
// ============================================================================
// Function: multiple_dispatches
template
auto multiple_dispatches() {
auto create_graph = [] (T& tf, size_t N, std::atomic& c) {
for(size_t i=0; i counter(0);
create_graph(tf, p, counter);
tf.wait_for_all();
assert(counter == p * 4);
}
}
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast(end - beg).count();
}
// ============================================================================
// Nested subflow
// ============================================================================
// Function: empty_subflow
template
auto empty_subflow() {
std::function grow;
grow = [&grow] (tf::SubflowBuilder& subflow, uint64_t depth) {
if(depth < 20) {
subflow.emplace(
[depth, &grow](tf::SubflowBuilder& subsubflow){ grow(subsubflow, depth+1); },
[depth, &grow](tf::SubflowBuilder& subsubflow){ grow(subsubflow, depth+1); });
subflow.detach();
}
};
auto beg = std::chrono::high_resolution_clock::now();
{
T tf;
tf.emplace([&] (tf::SubflowBuilder& subflow) { grow(subflow, 0); });
tf.wait_for_all();
}
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast(end - beg).count();
}
// Function: steady_subflow
template
auto steady_subflow() {
std::function grow;
grow = [&grow] (tf::SubflowBuilder& subflow, uint64_t depth) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
if(depth < 3) {
subflow.emplace(
[depth, &grow](tf::SubflowBuilder& subsubflow){ grow(subsubflow, depth+1); },
[depth, &grow](tf::SubflowBuilder& subsubflow){ grow(subsubflow, depth+1); });
subflow.detach();
}
};
auto beg = std::chrono::high_resolution_clock::now();
{
T tf;
tf.emplace([&] (tf::SubflowBuilder& subflow) { grow(subflow, 0); });
tf.wait_for_all();
}
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast(end - beg).count();
}
// ----------------------------------------------------------------------------
// Function: main
int main(int argc, char* argv[]) {
std::cout << std::setw(WIDTH) << "workload"
<< std::setw(WIDTH) << "tf+simple"
<< std::setw(WIDTH) << "tf+pro"
<< std::setw(WIDTH) << "tf+spec"
<< std::setw(WIDTH) << "tf+steal"
<< std::setw(WIDTH) << "tf+eigen"
<< std::endl;
BENCHMARK("map-reduce", map_reduce);
BENCHMARK("empty jobs", empty_jobs);
BENCHMARK("atomic add", atomic_add);
BENCHMARK("dispatches", multiple_dispatches);
BENCHMARK("b-tree", binary_tree);
BENCHMARK("linear", linear_graph);
BENCHMARK("dag", level_graph);
BENCHMARK("dynamic", dynamic_stem);
BENCHMARK("empty-subflow", empty_subflow);
BENCHMARK("steady-subflow", steady_subflow);
return 0;
}