/*
- * Copyright 2015 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
#include <folly/Benchmark.h>
-#include <folly/Foreach.h>
-#include <folly/json.h>
-#include <folly/String.h>
#include <algorithm>
-#include <boost/regex.hpp>
#include <cmath>
+#include <cstring>
#include <iostream>
#include <limits>
+#include <map>
+#include <memory>
#include <utility>
#include <vector>
-#include <cstring>
+
+#include <boost/regex.hpp>
+
+#include <folly/MapUtil.h>
+#include <folly/String.h>
+#include <folly/container/Foreach.h>
+#include <folly/json.h>
using namespace std;
DEFINE_bool(benchmark, false, "Run benchmarks.");
DEFINE_bool(json, false, "Output in JSON format.");
+DEFINE_bool(json_verbose, false, "Output in verbose JSON format.");
-DEFINE_string(bm_regex, "",
- "Only benchmarks whose names match this regex will be run.");
+DEFINE_string(
+ bm_regex,
+ "",
+ "Only benchmarks whose names match this regex will be run.");
-DEFINE_int64(bm_min_usec, 100,
- "Minimum # of microseconds we'll accept for each benchmark.");
+DEFINE_int64(
+ bm_min_usec,
+ 100,
+ "Minimum # of microseconds we'll accept for each benchmark.");
-DEFINE_int64(bm_min_iters, 1,
- "Minimum # of iterations we'll try for each benchmark.");
+DEFINE_int32(
+ bm_min_iters,
+ 1,
+ "Minimum # of iterations we'll try for each benchmark.");
-DEFINE_int32(bm_max_secs, 1,
- "Maximum # of seconds we'll spend on each benchmark.");
+DEFINE_int64(
+ bm_max_iters,
+ 1 << 30,
+ "Maximum # of iterations we'll try for each benchmark.");
+DEFINE_int32(
+ bm_max_secs,
+ 1,
+ "Maximum # of seconds we'll spend on each benchmark.");
namespace folly {
-BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
+std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
-
-vector<tuple<string, string, BenchmarkFun>>& benchmarks() {
- static vector<tuple<string, string, BenchmarkFun>> _benchmarks;
+vector<detail::BenchmarkRegistration>& benchmarks() {
+ static vector<detail::BenchmarkRegistration> _benchmarks;
return _benchmarks;
}
#endif
}
-int getGlobalBenchmarkBaselineIndex() {
+size_t getGlobalBenchmarkBaselineIndex() {
const char *global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
auto it = std::find_if(
- benchmarks().begin(),
- benchmarks().end(),
- [global](const tuple<string, string, BenchmarkFun> &v) {
- return get<1>(v) == global;
- }
- );
+ benchmarks().begin(),
+ benchmarks().end(),
+ [global](const detail::BenchmarkRegistration& v) {
+ return v.name == global;
+ });
CHECK(it != benchmarks().end());
- return it - benchmarks().begin();
+ return size_t(std::distance(benchmarks().begin(), it));
}
#undef FB_STRINGIZE_X2
void detail::addBenchmarkImpl(const char* file, const char* name,
BenchmarkFun fun) {
- benchmarks().emplace_back(file, name, std::move(fun));
-}
-
-/**
- * Given a point, gives density at that point as a number 0.0 < x <=
- * 1.0. The result is 1.0 if all samples are equal to where, and
- * decreases near 0 if all points are far away from it. The density is
- * computed with the help of a radial basis function.
- */
-static double density(const double * begin, const double *const end,
- const double where, const double bandwidth) {
- assert(begin < end);
- assert(bandwidth > 0.0);
- double sum = 0.0;
- FOR_EACH_RANGE (i, begin, end) {
- auto d = (*i - where) / bandwidth;
- sum += exp(- d * d);
- }
- return sum / (end - begin);
-}
-
-/**
- * Computes mean and variance for a bunch of data points. Note that
- * mean is currently not being used.
- */
-static pair<double, double>
-meanVariance(const double * begin, const double *const end) {
- assert(begin < end);
- double sum = 0.0, sum2 = 0.0;
- FOR_EACH_RANGE (i, begin, end) {
- sum += *i;
- sum2 += *i * *i;
- }
- auto const n = end - begin;
- return make_pair(sum / n, sqrt((sum2 - sum * sum / n) / n));
-}
-
-/**
- * Computes the mode of a sample set through brute force. Assumes
- * input is sorted.
- */
-static double mode(const double * begin, const double *const end) {
- assert(begin < end);
- // Lower bound and upper bound for result and their respective
- // densities.
- auto
- result = 0.0,
- bestDensity = 0.0;
-
- // Get the variance so we pass it down to density()
- auto const sigma = meanVariance(begin, end).second;
- if (!sigma) {
- // No variance means constant signal
- return *begin;
- }
-
- FOR_EACH_RANGE (i, begin, end) {
- assert(i == begin || *i >= i[-1]);
- auto candidate = density(begin, end, *i, sigma * sqrt(2.0));
- if (candidate > bestDensity) {
- // Found a new best
- bestDensity = candidate;
- result = *i;
- } else {
- // Density is decreasing... we could break here if we definitely
- // knew this is unimodal.
- }
- }
-
- return result;
+ benchmarks().push_back({file, name, std::move(fun)});
}
/**
// Current state of the art: get the minimum. After some
// experimentation, it seems taking the minimum is the best.
-
return *min_element(begin, end);
-
- // What follows after estimates the time as the mode of the
- // distribution.
-
- // Select the awesomest (i.e. most frequent) result. We do this by
- // sorting and then computing the longest run length.
- sort(begin, end);
-
- // Eliminate outliers. A time much larger than the minimum time is
- // considered an outlier.
- while (end[-1] > 2.0 * *begin) {
- --end;
- if (begin == end) {
- LOG(INFO) << *begin;
- }
- assert(begin < end);
- }
-
- double result = 0;
-
- /* Code used just for comparison purposes */ {
- unsigned bestFrequency = 0;
- unsigned candidateFrequency = 1;
- double candidateValue = *begin;
- for (auto current = begin + 1; ; ++current) {
- if (current == end || *current != candidateValue) {
- // Done with the current run, see if it was best
- if (candidateFrequency > bestFrequency) {
- bestFrequency = candidateFrequency;
- result = candidateValue;
- }
- if (current == end) {
- break;
- }
- // Start a new run
- candidateValue = *current;
- candidateFrequency = 1;
- } else {
- // Cool, inside a run, increase the frequency
- ++candidateFrequency;
- }
- }
- }
-
- result = mode(begin, end);
-
- return result;
}
static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
const double globalBaseline) {
+ using std::chrono::duration_cast;
+ using std::chrono::high_resolution_clock;
+ using std::chrono::microseconds;
+ using std::chrono::nanoseconds;
+ using std::chrono::seconds;
+
// They key here is accuracy; too low numbers means the accuracy was
// coarse. We up the ante until we get to at least minNanoseconds
// timings.
- static uint64_t resolutionInNs = 0;
- if (!resolutionInNs) {
- timespec ts;
- CHECK_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts));
- CHECK_EQ(0, ts.tv_sec) << "Clock sucks.";
- CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good.";
- CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
- resolutionInNs = ts.tv_nsec;
- }
+ static_assert(
+ std::is_same<high_resolution_clock::duration, nanoseconds>::value,
+ "High resolution clock must be nanosecond resolution.");
// We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
// the clock resolution is worse than that, it will be larger. In
// essence we're aiming at making the quantization noise 0.01%.
- static const auto minNanoseconds =
- max<uint64_t>(FLAGS_bm_min_usec * 1000UL,
- min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
+ static const auto minNanoseconds = std::max<nanoseconds>(
+ nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
// We do measurements in several epochs and take the minimum, to
// account for jitter.
static const unsigned int epochs = 1000;
// We establish a total time budget as we don't want a measurement
// to take too long. This will curtail the number of actual epochs.
- const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL;
- timespec global;
- CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
+ const auto timeBudget = seconds(FLAGS_bm_max_secs);
+ auto global = high_resolution_clock::now();
double epochResults[epochs] = { 0 };
size_t actualEpochs = 0;
for (; actualEpochs < epochs; ++actualEpochs) {
- for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) {
- auto const nsecsAndIter = fun(n);
+ const auto maxIters = uint32_t(FLAGS_bm_max_iters);
+ for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= 2) {
+ auto const nsecsAndIter = fun(static_cast<unsigned int>(n));
if (nsecsAndIter.first < minNanoseconds) {
continue;
}
// We got an accurate enough timing, done. But only save if
// smaller than the current result.
- epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
- nsecsAndIter.second - globalBaseline);
+ auto nsecs = duration_cast<nanoseconds>(nsecsAndIter.first).count();
+ epochResults[actualEpochs] =
+ max(0.0, double(nsecs) / nsecsAndIter.second - globalBaseline);
// Done with the current epoch, we got a meaningful timing.
break;
}
- timespec now;
- CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
- if (detail::timespecDiff(now, global) >= timeBudgetInNs) {
+ auto now = high_resolution_clock::now();
+ if (now - global >= timeBudget) {
// No more time budget available.
++actualEpochs;
break;
}
static void printBenchmarkResultsAsTable(
- const vector<tuple<string, string, double> >& data) {
+ const vector<detail::BenchmarkResult>& data) {
// Width available
static const unsigned int columns = 76;
// Compute the longest benchmark name
size_t longestName = 0;
- FOR_EACH_RANGE (i, 1, benchmarks().size()) {
- longestName = max(longestName, get<1>(benchmarks()[i]).size());
+ for (auto& bm : benchmarks()) {
+ longestName = max(longestName, bm.name.size());
}
// Print a horizontal rule
string lastFile;
for (auto& datum : data) {
- auto file = get<0>(datum);
+ auto file = datum.file;
if (file != lastFile) {
// New file starting
header(file);
lastFile = file;
}
- string s = get<1>(datum);
+ string s = datum.name;
if (s == "-") {
separator('-');
continue;
s.erase(0, 1);
useBaseline = true;
} else {
- baselineNsPerIter = get<2>(datum);
+ baselineNsPerIter = datum.timeInNs;
useBaseline = false;
}
s.resize(columns - 29, ' ');
- auto nsPerIter = get<2>(datum);
+ auto nsPerIter = datum.timeInNs;
auto secPerIter = nsPerIter / 1E9;
- auto itersPerSec = 1 / secPerIter;
+ auto itersPerSec = (secPerIter == 0)
+ ? std::numeric_limits<double>::infinity()
+ : (1 / secPerIter);
if (!useBaseline) {
// Print without baseline
printf("%*s %9s %7s\n",
}
static void printBenchmarkResultsAsJson(
- const vector<tuple<string, string, double> >& data) {
+ const vector<detail::BenchmarkResult>& data) {
dynamic d = dynamic::object;
for (auto& datum: data) {
- d[std::get<1>(datum)] = std::get<2>(datum) * 1000.;
+ d[datum.name] = datum.timeInNs * 1000.;
}
printf("%s\n", toPrettyJson(d).c_str());
}
-static void printBenchmarkResults(
- const vector<tuple<string, string, double> >& data) {
+static void printBenchmarkResultsAsVerboseJson(
+ const vector<detail::BenchmarkResult>& data) {
+ dynamic d;
+ benchmarkResultsToDynamic(data, d);
+ printf("%s\n", toPrettyJson(d).c_str());
+}
- if (FLAGS_json) {
+static void printBenchmarkResults(const vector<detail::BenchmarkResult>& data) {
+ if (FLAGS_json_verbose) {
+ printBenchmarkResultsAsVerboseJson(data);
+ } else if (FLAGS_json) {
printBenchmarkResultsAsJson(data);
} else {
printBenchmarkResultsAsTable(data);
}
}
+void benchmarkResultsToDynamic(
+ const vector<detail::BenchmarkResult>& data,
+ dynamic& out) {
+ out = dynamic::array;
+ for (auto& datum : data) {
+ out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
+ }
+}
+
+void benchmarkResultsFromDynamic(
+ const dynamic& d,
+ vector<detail::BenchmarkResult>& results) {
+ for (auto& datum : d) {
+ results.push_back(
+ {datum[0].asString(), datum[1].asString(), datum[2].asDouble()});
+ }
+}
+
+static pair<StringPiece, StringPiece> resultKey(
+ const detail::BenchmarkResult& result) {
+ return pair<StringPiece, StringPiece>(result.file, result.name);
+}
+
+void printResultComparison(
+ const vector<detail::BenchmarkResult>& base,
+ const vector<detail::BenchmarkResult>& test) {
+ map<pair<StringPiece, StringPiece>, double> baselines;
+
+ for (auto& baseResult : base) {
+ baselines[resultKey(baseResult)] = baseResult.timeInNs;
+ }
+ //
+ // Width available
+ static const unsigned int columns = 76;
+
+ // Compute the longest benchmark name
+ size_t longestName = 0;
+ for (auto& datum : test) {
+ longestName = max(longestName, datum.name.size());
+ }
+
+ // Print a horizontal rule
+ auto separator = [&](char pad) { puts(string(columns, pad).c_str()); };
+
+ // Print header for a file
+ auto header = [&](const string& file) {
+ separator('=');
+ printf("%-*srelative time/iter iters/s\n", columns - 28, file.c_str());
+ separator('=');
+ };
+
+ string lastFile;
+
+ for (auto& datum : test) {
+ folly::Optional<double> baseline =
+ folly::get_optional(baselines, resultKey(datum));
+ auto file = datum.file;
+ if (file != lastFile) {
+ // New file starting
+ header(file);
+ lastFile = file;
+ }
+
+ string s = datum.name;
+ if (s == "-") {
+ separator('-');
+ continue;
+ }
+ if (s[0] == '%') {
+ s.erase(0, 1);
+ }
+ s.resize(columns - 29, ' ');
+ auto nsPerIter = datum.timeInNs;
+ auto secPerIter = nsPerIter / 1E9;
+ auto itersPerSec = (secPerIter == 0)
+ ? std::numeric_limits<double>::infinity()
+ : (1 / secPerIter);
+ if (!baseline) {
+ // Print without baseline
+ printf(
+ "%*s %9s %7s\n",
+ static_cast<int>(s.size()),
+ s.c_str(),
+ readableTime(secPerIter, 2).c_str(),
+ metricReadable(itersPerSec, 2).c_str());
+ } else {
+ // Print with baseline
+ auto rel = *baseline / nsPerIter * 100.0;
+ printf(
+ "%*s %7.2f%% %9s %7s\n",
+ static_cast<int>(s.size()),
+ s.c_str(),
+ rel,
+ readableTime(secPerIter, 2).c_str(),
+ metricReadable(itersPerSec, 2).c_str());
+ }
+ }
+ separator('=');
+}
+
void runBenchmarks() {
CHECK(!benchmarks().empty());
- vector<tuple<string, string, double>> results;
+ vector<detail::BenchmarkResult> results;
results.reserve(benchmarks().size() - 1);
std::unique_ptr<boost::regex> bmRegex;
if (!FLAGS_bm_regex.empty()) {
- bmRegex.reset(new boost::regex(FLAGS_bm_regex));
+ bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
}
// PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
- unsigned int baselineIndex = getGlobalBenchmarkBaselineIndex();
+ size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
auto const globalBaseline =
- runBenchmarkGetNSPerIteration(get<2>(benchmarks()[baselineIndex]), 0);
+ runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, 0);
FOR_EACH_RANGE (i, 0, benchmarks().size()) {
if (i == baselineIndex) {
continue;
}
double elapsed = 0.0;
- if (get<1>(benchmarks()[i]) != "-") { // skip separators
- if (bmRegex && !boost::regex_search(get<1>(benchmarks()[i]), *bmRegex)) {
+ auto& bm = benchmarks()[i];
+ if (bm.name != "-") { // skip separators
+ if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
continue;
}
- elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks()[i]),
- globalBaseline);
+ elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline);
}
- results.emplace_back(get<0>(benchmarks()[i]),
- get<1>(benchmarks()[i]), elapsed);
+ results.push_back({bm.file, bm.name, elapsed});
}
// PLEASE MAKE NOISE. MEASUREMENTS DONE.