X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FBenchmark.cpp;h=2e9a7ef48cae09f63c3ac0735c316f1c64fc1bcf;hb=b22d1721ed9c062e22b33936e13b39ed1507391a;hp=6c481335a63eaee06a1129873b8f3a68c865e4de;hpb=a83ba29c59e69de4c7c834960dc65bb471de9f25;p=folly.git diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp index 6c481335..2e9a7ef4 100644 --- a/folly/Benchmark.cpp +++ b/folly/Benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2016 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,23 +17,29 @@ // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com) #include -#include -#include -#include #include -#include #include +#include #include #include +#include +#include #include #include -#include + +#include + +#include +#include +#include +#include using namespace std; DEFINE_bool(benchmark, false, "Run benchmarks."); DEFINE_bool(json, false, "Output in JSON format."); +DEFINE_bool(json_verbose, false, "Output in verbose JSON format."); DEFINE_string( bm_regex, @@ -52,7 +58,7 @@ DEFINE_int32( DEFINE_int64( bm_max_iters, - 1L << 30L, + 1 << 30, "Maximum # of iterations we'll try for each benchmark."); DEFINE_int32( @@ -62,13 +68,12 @@ DEFINE_int32( namespace folly { -BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent; +std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent; typedef function BenchmarkFun; - -vector>& benchmarks() { - static vector> _benchmarks; +vector& benchmarks() { + static vector _benchmarks; return _benchmarks; } @@ -87,12 +92,11 @@ BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) { size_t getGlobalBenchmarkBaselineIndex() { const char *global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE); auto it = std::find_if( - benchmarks().begin(), - benchmarks().end(), - [global](const tuple &v) { - return get<1>(v) == global; - } - ); + benchmarks().begin(), + benchmarks().end(), + [global](const detail::BenchmarkRegistration& v) { + return v.name == global; + }); CHECK(it != benchmarks().end()); return size_t(std::distance(benchmarks().begin(), it)); } @@ -102,41 +106,7 @@ size_t getGlobalBenchmarkBaselineIndex() { void detail::addBenchmarkImpl(const char* file, const char* name, BenchmarkFun fun) { - benchmarks().emplace_back(file, name, std::move(fun)); -} - -/** - * Given a point, gives density at that point as a number 0.0 < x <= - * 1.0. The result is 1.0 if all samples are equal to where, and - * decreases near 0 if all points are far away from it. The density is - * computed with the help of a radial basis function. - */ -static double density(const double * begin, const double *const end, - const double where, const double bandwidth) { - assert(begin < end); - assert(bandwidth > 0.0); - double sum = 0.0; - FOR_EACH_RANGE (i, begin, end) { - auto d = (*i - where) / bandwidth; - sum += exp(- d * d); - } - return sum / (end - begin); -} - -/** - * Computes mean and variance for a bunch of data points. Note that - * mean is currently not being used. - */ -static pair -meanVariance(const double * begin, const double *const end) { - assert(begin < end); - double sum = 0.0, sum2 = 0.0; - FOR_EACH_RANGE (i, begin, end) { - sum += *i; - sum2 += *i * *i; - } - auto const n = end - begin; - return make_pair(sum / n, sqrt((sum2 - sum * sum / n) / n)); + benchmarks().push_back({file, name, std::move(fun)}); } /** @@ -152,54 +122,52 @@ static double estimateTime(double * begin, double * end) { static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, const double globalBaseline) { + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::microseconds; + using std::chrono::nanoseconds; + using std::chrono::seconds; + // They key here is accuracy; too low numbers means the accuracy was // coarse. We up the ante until we get to at least minNanoseconds // timings. - static uint64_t resolutionInNs = 0; - if (!resolutionInNs) { - timespec ts; - CHECK_EQ(0, clock_getres(CLOCK_REALTIME, &ts)); - CHECK_EQ(0, ts.tv_sec) << "Clock sucks."; - CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good."; - CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel."; - resolutionInNs = ts.tv_nsec; - } + static_assert( + std::is_same::value, + "High resolution clock must be nanosecond resolution."); // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if // the clock resolution is worse than that, it will be larger. In // essence we're aiming at making the quantization noise 0.01%. - static const auto minNanoseconds = - max(FLAGS_bm_min_usec * 1000UL, - min(resolutionInNs * 100000, 1000000000ULL)); + static const auto minNanoseconds = std::max( + nanoseconds(100000), microseconds(FLAGS_bm_min_usec)); // We do measurements in several epochs and take the minimum, to // account for jitter. static const unsigned int epochs = 1000; // We establish a total time budget as we don't want a measurement // to take too long. This will curtail the number of actual epochs. - const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL; - timespec global; - CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global)); + const auto timeBudget = seconds(FLAGS_bm_max_secs); + auto global = high_resolution_clock::now(); double epochResults[epochs] = { 0 }; size_t actualEpochs = 0; for (; actualEpochs < epochs; ++actualEpochs) { - const auto maxIters = FLAGS_bm_max_iters; - for (unsigned int n = FLAGS_bm_min_iters; n < maxIters; n *= 2) { - auto const nsecsAndIter = fun(n); + const auto maxIters = uint32_t(FLAGS_bm_max_iters); + for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= 2) { + auto const nsecsAndIter = fun(static_cast(n)); if (nsecsAndIter.first < minNanoseconds) { continue; } // We got an accurate enough timing, done. But only save if // smaller than the current result. - epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) / - nsecsAndIter.second - globalBaseline); + auto nsecs = duration_cast(nsecsAndIter.first).count(); + epochResults[actualEpochs] = + max(0.0, double(nsecs) / nsecsAndIter.second - globalBaseline); // Done with the current epoch, we got a meaningful timing. break; } - timespec now; - CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &now)); - if (detail::timespecDiff(now, global) >= timeBudgetInNs) { + auto now = high_resolution_clock::now(); + if (now - global >= timeBudget) { // No more time budget available. ++actualEpochs; break; @@ -277,14 +245,14 @@ static string metricReadable(double n, unsigned int decimals) { } static void printBenchmarkResultsAsTable( - const vector >& data) { + const vector& data) { // Width available static const unsigned int columns = 76; // Compute the longest benchmark name size_t longestName = 0; - FOR_EACH_RANGE (i, 1, benchmarks().size()) { - longestName = max(longestName, get<1>(benchmarks()[i]).size()); + for (auto& bm : benchmarks()) { + longestName = max(longestName, bm.name.size()); } // Print a horizontal rule @@ -304,14 +272,14 @@ static void printBenchmarkResultsAsTable( string lastFile; for (auto& datum : data) { - auto file = get<0>(datum); + auto file = datum.file; if (file != lastFile) { // New file starting header(file); lastFile = file; } - string s = get<1>(datum); + string s = datum.name; if (s == "-") { separator('-'); continue; @@ -321,11 +289,11 @@ static void printBenchmarkResultsAsTable( s.erase(0, 1); useBaseline = true; } else { - baselineNsPerIter = get<2>(datum); + baselineNsPerIter = datum.timeInNs; useBaseline = false; } s.resize(columns - 29, ' '); - auto nsPerIter = get<2>(datum); + auto nsPerIter = datum.timeInNs; auto secPerIter = nsPerIter / 1E9; auto itersPerSec = (secPerIter == 0) ? std::numeric_limits::infinity() @@ -350,34 +318,141 @@ static void printBenchmarkResultsAsTable( } static void printBenchmarkResultsAsJson( - const vector >& data) { + const vector& data) { dynamic d = dynamic::object; for (auto& datum: data) { - d[std::get<1>(datum)] = std::get<2>(datum) * 1000.; + d[datum.name] = datum.timeInNs * 1000.; } printf("%s\n", toPrettyJson(d).c_str()); } -static void printBenchmarkResults( - const vector >& data) { +static void printBenchmarkResultsAsVerboseJson( + const vector& data) { + dynamic d; + benchmarkResultsToDynamic(data, d); + printf("%s\n", toPrettyJson(d).c_str()); +} - if (FLAGS_json) { +static void printBenchmarkResults(const vector& data) { + if (FLAGS_json_verbose) { + printBenchmarkResultsAsVerboseJson(data); + } else if (FLAGS_json) { printBenchmarkResultsAsJson(data); } else { printBenchmarkResultsAsTable(data); } } +void benchmarkResultsToDynamic( + const vector& data, + dynamic& out) { + out = dynamic::array; + for (auto& datum : data) { + out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs)); + } +} + +void benchmarkResultsFromDynamic( + const dynamic& d, + vector& results) { + for (auto& datum : d) { + results.push_back( + {datum[0].asString(), datum[1].asString(), datum[2].asDouble()}); + } +} + +static pair resultKey( + const detail::BenchmarkResult& result) { + return pair(result.file, result.name); +} + +void printResultComparison( + const vector& base, + const vector& test) { + map, double> baselines; + + for (auto& baseResult : base) { + baselines[resultKey(baseResult)] = baseResult.timeInNs; + } + // + // Width available + static const unsigned int columns = 76; + + // Compute the longest benchmark name + size_t longestName = 0; + for (auto& datum : test) { + longestName = max(longestName, datum.name.size()); + } + + // Print a horizontal rule + auto separator = [&](char pad) { puts(string(columns, pad).c_str()); }; + + // Print header for a file + auto header = [&](const string& file) { + separator('='); + printf("%-*srelative time/iter iters/s\n", columns - 28, file.c_str()); + separator('='); + }; + + string lastFile; + + for (auto& datum : test) { + folly::Optional baseline = + folly::get_optional(baselines, resultKey(datum)); + auto file = datum.file; + if (file != lastFile) { + // New file starting + header(file); + lastFile = file; + } + + string s = datum.name; + if (s == "-") { + separator('-'); + continue; + } + if (s[0] == '%') { + s.erase(0, 1); + } + s.resize(columns - 29, ' '); + auto nsPerIter = datum.timeInNs; + auto secPerIter = nsPerIter / 1E9; + auto itersPerSec = (secPerIter == 0) + ? std::numeric_limits::infinity() + : (1 / secPerIter); + if (!baseline) { + // Print without baseline + printf( + "%*s %9s %7s\n", + static_cast(s.size()), + s.c_str(), + readableTime(secPerIter, 2).c_str(), + metricReadable(itersPerSec, 2).c_str()); + } else { + // Print with baseline + auto rel = *baseline / nsPerIter * 100.0; + printf( + "%*s %7.2f%% %9s %7s\n", + static_cast(s.size()), + s.c_str(), + rel, + readableTime(secPerIter, 2).c_str(), + metricReadable(itersPerSec, 2).c_str()); + } + } + separator('='); +} + void runBenchmarks() { CHECK(!benchmarks().empty()); - vector> results; + vector results; results.reserve(benchmarks().size() - 1); std::unique_ptr bmRegex; if (!FLAGS_bm_regex.empty()) { - bmRegex.reset(new boost::regex(FLAGS_bm_regex)); + bmRegex = std::make_unique(FLAGS_bm_regex); } // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS. @@ -385,21 +460,20 @@ void runBenchmarks() { size_t baselineIndex = getGlobalBenchmarkBaselineIndex(); auto const globalBaseline = - runBenchmarkGetNSPerIteration(get<2>(benchmarks()[baselineIndex]), 0); + runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, 0); FOR_EACH_RANGE (i, 0, benchmarks().size()) { if (i == baselineIndex) { continue; } double elapsed = 0.0; - if (get<1>(benchmarks()[i]) != "-") { // skip separators - if (bmRegex && !boost::regex_search(get<1>(benchmarks()[i]), *bmRegex)) { + auto& bm = benchmarks()[i]; + if (bm.name != "-") { // skip separators + if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) { continue; } - elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks()[i]), - globalBaseline); + elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline); } - results.emplace_back(get<0>(benchmarks()[i]), - get<1>(benchmarks()[i]), elapsed); + results.push_back({bm.file, bm.name, elapsed}); } // PLEASE MAKE NOISE. MEASUREMENTS DONE.