/*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
-#include "Benchmark.h"
-#include "Foreach.h"
-#include "json.h"
-#include "String.h"
+#include <folly/Benchmark.h>
+#include <folly/Foreach.h>
+#include <folly/json.h>
+#include <folly/String.h>
#include <algorithm>
#include <boost/regex.hpp>
#include <limits>
#include <utility>
#include <vector>
+#include <cstring>
using namespace std;
DEFINE_bool(benchmark, false, "Run benchmarks.");
DEFINE_bool(json, false, "Output in JSON format.");
-DEFINE_string(bm_regex, "",
- "Only benchmarks whose names match this regex will be run.");
+DEFINE_string(
+ bm_regex,
+ "",
+ "Only benchmarks whose names match this regex will be run.");
-DEFINE_int64(bm_min_usec, 100,
- "Minimum # of microseconds we'll accept for each benchmark.");
+DEFINE_int64(
+ bm_min_usec,
+ 100,
+ "Minimum # of microseconds we'll accept for each benchmark.");
-DEFINE_int64(bm_min_iters, 1,
- "Minimum # of iterations we'll try for each benchmark.");
+DEFINE_int32(
+ bm_min_iters,
+ 1,
+ "Minimum # of iterations we'll try for each benchmark.");
-DEFINE_int32(bm_max_secs, 1,
- "Maximum # of seconds we'll spend on each benchmark.");
+DEFINE_int64(
+ bm_max_iters,
+ 1L << 30L,
+ "Maximum # of iterations we'll try for each benchmark.");
+DEFINE_int32(
+ bm_max_secs,
+ 1,
+ "Maximum # of seconds we'll spend on each benchmark.");
namespace folly {
BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
-typedef function<uint64_t(unsigned int)> BenchmarkFun;
-static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;
+typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
+
+
+vector<tuple<string, string, BenchmarkFun>>& benchmarks() {
+ static vector<tuple<string, string, BenchmarkFun>> _benchmarks;
+ return _benchmarks;
+}
+
+#define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline
+#define FB_STRINGIZE_X2(x) FB_STRINGIZE(x)
// Add the global baseline
-BENCHMARK(globalBenchmarkBaseline) {
+BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) {
+#ifdef _MSC_VER
+ _ReadWriteBarrier();
+#else
asm volatile("");
+#endif
}
+int getGlobalBenchmarkBaselineIndex() {
+ const char *global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
+ auto it = std::find_if(
+ benchmarks().begin(),
+ benchmarks().end(),
+ [global](const tuple<string, string, BenchmarkFun> &v) {
+ return get<1>(v) == global;
+ }
+ );
+ CHECK(it != benchmarks().end());
+ return it - benchmarks().begin();
+}
+
+#undef FB_STRINGIZE_X2
+#undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
+
void detail::addBenchmarkImpl(const char* file, const char* name,
BenchmarkFun fun) {
- benchmarks.emplace_back(file, name, std::move(fun));
+ benchmarks().emplace_back(file, name, std::move(fun));
}
/**
// They key here is accuracy; too low numbers means the accuracy was
// coarse. We up the ante until we get to at least minNanoseconds
// timings.
- static uint64_t resolutionInNs = 0, coarseResolutionInNs = 0;
+ static uint64_t resolutionInNs = 0;
if (!resolutionInNs) {
timespec ts;
- CHECK_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts));
+ CHECK_EQ(0, clock_getres(CLOCK_REALTIME, &ts));
CHECK_EQ(0, ts.tv_sec) << "Clock sucks.";
CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good.";
CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
// the clock resolution is worse than that, it will be larger. In
// essence we're aiming at making the quantization noise 0.01%.
static const auto minNanoseconds =
- max(FLAGS_bm_min_usec * 1000UL,
+ max<uint64_t>(FLAGS_bm_min_usec * 1000UL,
min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
// We do measurements in several epochs and take the minimum, to
static const unsigned int epochs = 1000;
// We establish a total time budget as we don't want a measurement
// to take too long. This will curtail the number of actual epochs.
- const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
+ const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL;
timespec global;
CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
size_t actualEpochs = 0;
for (; actualEpochs < epochs; ++actualEpochs) {
- for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) {
- auto const nsecs = fun(n);
- if (nsecs < minNanoseconds) {
+ const auto maxIters = FLAGS_bm_max_iters;
+ for (unsigned int n = FLAGS_bm_min_iters; n < maxIters; n *= 2) {
+ auto const nsecsAndIter = fun(n);
+ if (nsecsAndIter.first < minNanoseconds) {
continue;
}
// We got an accurate enough timing, done. But only save if
// smaller than the current result.
- epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline);
+ epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
+ nsecsAndIter.second - globalBaseline);
// Done with the current epoch, we got a meaningful timing.
break;
}
{ 1E-9, "ns" },
{ 1E-12, "ps" },
{ 1E-15, "fs" },
- { 0, NULL },
+ { 0, nullptr },
};
static const ScaleInfo kMetricSuffixes[] {
{ 1E-18, "a" }, // atto
{ 1E-21, "z" }, // zepto
{ 1E-24, "y" }, // yocto
- { 0, NULL },
+ { 0, nullptr },
};
static string humanReadable(double n, unsigned int decimals,
const double absValue = fabs(n);
const ScaleInfo* scale = scales;
- while (absValue < scale[0].boundary && scale[1].suffix != NULL) {
+ while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
++scale;
}
}
static void printBenchmarkResultsAsTable(
- const vector<tuple<const char*, const char*, double> >& data) {
+ const vector<tuple<string, string, double> >& data) {
// Width available
- static const uint columns = 76;
+ static const unsigned int columns = 76;
// Compute the longest benchmark name
size_t longestName = 0;
- FOR_EACH_RANGE (i, 1, benchmarks.size()) {
- longestName = max(longestName, strlen(get<1>(benchmarks[i])));
+ FOR_EACH_RANGE (i, 1, benchmarks().size()) {
+ longestName = max(longestName, get<1>(benchmarks()[i]).size());
}
// Print a horizontal rule
};
// Print header for a file
- auto header = [&](const char* file) {
+ auto header = [&](const string& file) {
separator('=');
printf("%-*srelative time/iter iters/s\n",
- columns - 28, file);
+ columns - 28, file.c_str());
separator('=');
};
double baselineNsPerIter = numeric_limits<double>::max();
- const char* lastFile = "";
+ string lastFile;
for (auto& datum : data) {
auto file = get<0>(datum);
- if (strcmp(file, lastFile)) {
+ if (file != lastFile) {
// New file starting
header(file);
lastFile = file;
s.resize(columns - 29, ' ');
auto nsPerIter = get<2>(datum);
auto secPerIter = nsPerIter / 1E9;
- auto itersPerSec = 1 / secPerIter;
+ auto itersPerSec = (secPerIter == 0)
+ ? std::numeric_limits<double>::infinity()
+ : (1 / secPerIter);
if (!useBaseline) {
// Print without baseline
printf("%*s %9s %7s\n",
}
static void printBenchmarkResultsAsJson(
- const vector<tuple<const char*, const char*, double> >& data) {
+ const vector<tuple<string, string, double> >& data) {
dynamic d = dynamic::object;
for (auto& datum: data) {
d[std::get<1>(datum)] = std::get<2>(datum) * 1000.;
}
static void printBenchmarkResults(
- const vector<tuple<const char*, const char*, double> >& data) {
+ const vector<tuple<string, string, double> >& data) {
if (FLAGS_json) {
printBenchmarkResultsAsJson(data);
}
void runBenchmarks() {
- CHECK(!benchmarks.empty());
+ CHECK(!benchmarks().empty());
- vector<tuple<const char*, const char*, double>> results;
- results.reserve(benchmarks.size() - 1);
+ vector<tuple<string, string, double>> results;
+ results.reserve(benchmarks().size() - 1);
std::unique_ptr<boost::regex> bmRegex;
if (!FLAGS_bm_regex.empty()) {
// PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
- auto const globalBaseline = runBenchmarkGetNSPerIteration(
- get<2>(benchmarks.front()), 0);
- FOR_EACH_RANGE (i, 1, benchmarks.size()) {
+ unsigned int baselineIndex = getGlobalBenchmarkBaselineIndex();
+
+ auto const globalBaseline =
+ runBenchmarkGetNSPerIteration(get<2>(benchmarks()[baselineIndex]), 0);
+ FOR_EACH_RANGE (i, 0, benchmarks().size()) {
+ if (i == baselineIndex) {
+ continue;
+ }
double elapsed = 0.0;
- if (strcmp(get<1>(benchmarks[i]), "-")) { // skip separators
- if (bmRegex && !boost::regex_search(get<1>(benchmarks[i]), *bmRegex)) {
+ if (get<1>(benchmarks()[i]) != "-") { // skip separators
+ if (bmRegex && !boost::regex_search(get<1>(benchmarks()[i]), *bmRegex)) {
continue;
}
- elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
+ elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks()[i]),
globalBaseline);
}
- results.emplace_back(get<0>(benchmarks[i]),
- get<1>(benchmarks[i]), elapsed);
+ results.emplace_back(get<0>(benchmarks()[i]),
+ get<1>(benchmarks()[i]), elapsed);
}
// PLEASE MAKE NOISE. MEASUREMENTS DONE.