/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "Foreach.h"
#include "json.h"
#include "String.h"
+
#include <algorithm>
+#include <boost/regex.hpp>
#include <cmath>
#include <iostream>
#include <limits>
DEFINE_bool(benchmark, false, "Run benchmarks.");
DEFINE_bool(json, false, "Output in JSON format.");
+DEFINE_string(bm_regex, "",
+ "Only benchmarks whose names match this regex will be run.");
+
+DEFINE_int64(bm_min_usec, 100,
+ "Minimum # of microseconds we'll accept for each benchmark.");
+
+DEFINE_int64(bm_min_iters, 1,
+ "Minimum # of iterations we'll try for each benchmark.");
+
+DEFINE_int32(bm_max_secs, 1,
+ "Maximum # of seconds we'll spend on each benchmark.");
+
+
namespace folly {
BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
-typedef function<uint64_t(unsigned int)> BenchmarkFun;
+typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;
// Add the global baseline
BENCHMARK(globalBenchmarkBaseline) {
+#ifdef _MSC_VER
+ _ReadWriteBarrier();
+#else
asm volatile("");
+#endif
}
void detail::addBenchmarkImpl(const char* file, const char* name,
CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
resolutionInNs = ts.tv_nsec;
}
- // Whe choose a minimum minimum (sic) of 10,000 nanoseconds, but if
+ // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
// the clock resolution is worse than that, it will be larger. In
// essence we're aiming at making the quantization noise 0.01%.
- static const auto minNanoseconds = min(resolutionInNs * 100000, 1000000000UL);
+ static const auto minNanoseconds =
+ max<uint64_t>(FLAGS_bm_min_usec * 1000UL,
+ min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
// We do measurements in several epochs and take the minimum, to
// account for jitter.
static const unsigned int epochs = 1000;
// We establish a total time budget as we don't want a measurement
// to take too long. This will curtail the number of actual epochs.
- static const uint64_t timeBudgetInNs = 1000000000;
+ const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
timespec global;
CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
size_t actualEpochs = 0;
for (; actualEpochs < epochs; ++actualEpochs) {
- for (unsigned int n = 1; n < (1U << 30); n *= 2) {
- auto const nsecs = fun(n);
- if (nsecs < minNanoseconds) {
+ for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) {
+ auto const nsecsAndIter = fun(n);
+ if (nsecsAndIter.first < minNanoseconds) {
continue;
}
// We got an accurate enough timing, done. But only save if
// smaller than the current result.
- epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline);
+ epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
+ nsecsAndIter.second - globalBaseline);
// Done with the current epoch, we got a meaningful timing.
break;
}
return max(0.0, estimateTime(epochResults, epochResults + actualEpochs));
}
-static string humanReadable(double n, unsigned int decimals) {
- auto a = fabs(n);
- char suffix = ' ';
-
- if (a >= 1E21) {
- // Too big to be comprehended by the puny human brain
- suffix = '!';
- n /= 1E21;
- } else if (a >= 1E18) {
- // "EXA" written with suffix 'X' so as to not create confusion
- // with scientific notation.
- suffix = 'X';
- n /= 1E18;
- } else if (a >= 1E15) {
- // "PETA"
- suffix = 'P';
- n /= 1E15;
- } else if (a >= 1E12) {
- // "TERA"
- suffix = 'T';
- n /= 1E12;
- } else if (a >= 1E9) {
- // "GIGA"
- suffix = 'G';
- n /= 1E9;
- } else if (a >= 1E6) {
- // "MEGA"
- suffix = 'M';
- n /= 1E6;
- } else if (a >= 1E3) {
- // "KILO"
- suffix = 'K';
- n /= 1E3;
- } else if (a == 0.0) {
- suffix = ' ';
- } else if (a < 1E-15) {
- // too small
- suffix = '?';
- n *= 1E18;
- } else if (a < 1E-12) {
- // "femto"
- suffix = 'f';
- n *= 1E15;
- } else if (a < 1E-9) {
- // "pico"
- suffix = 'p';
- n *= 1E12;
- } else if (a < 1E-6) {
- // "nano"
- suffix = 'n';
- n *= 1E9;
- } else if (a < 1E-3) {
- // "micro"
- suffix = 'u';
- n *= 1E6;
- } else if (a < 1) {
- // "mili"
- suffix = 'm';
- n *= 1E3;
+struct ScaleInfo {
+ double boundary;
+ const char* suffix;
+};
+
+static const ScaleInfo kTimeSuffixes[] {
+ { 365.25 * 24 * 3600, "years" },
+ { 24 * 3600, "days" },
+ { 3600, "hr" },
+ { 60, "min" },
+ { 1, "s" },
+ { 1E-3, "ms" },
+ { 1E-6, "us" },
+ { 1E-9, "ns" },
+ { 1E-12, "ps" },
+ { 1E-15, "fs" },
+ { 0, nullptr },
+};
+
+static const ScaleInfo kMetricSuffixes[] {
+ { 1E24, "Y" }, // yotta
+ { 1E21, "Z" }, // zetta
+ { 1E18, "X" }, // "exa" written with suffix 'X' so as to not create
+ // confusion with scientific notation
+ { 1E15, "P" }, // peta
+ { 1E12, "T" }, // terra
+ { 1E9, "G" }, // giga
+ { 1E6, "M" }, // mega
+ { 1E3, "K" }, // kilo
+ { 1, "" },
+ { 1E-3, "m" }, // milli
+ { 1E-6, "u" }, // micro
+ { 1E-9, "n" }, // nano
+ { 1E-12, "p" }, // pico
+ { 1E-15, "f" }, // femto
+ { 1E-18, "a" }, // atto
+ { 1E-21, "z" }, // zepto
+ { 1E-24, "y" }, // yocto
+ { 0, nullptr },
+};
+
+static string humanReadable(double n, unsigned int decimals,
+ const ScaleInfo* scales) {
+ if (std::isinf(n) || std::isnan(n)) {
+ return folly::to<string>(n);
+ }
+
+ const double absValue = fabs(n);
+ const ScaleInfo* scale = scales;
+ while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
+ ++scale;
}
- return stringPrintf("%*.*f%c", decimals + 3 + 1, decimals, n, suffix);
+ const double scaledValue = n / scale->boundary;
+ return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
+}
+
+static string readableTime(double n, unsigned int decimals) {
+ return humanReadable(n, decimals, kTimeSuffixes);
+}
+
+static string metricReadable(double n, unsigned int decimals) {
+ return humanReadable(n, decimals, kMetricSuffixes);
}
static void printBenchmarkResultsAsTable(
const vector<tuple<const char*, const char*, double> >& data) {
// Width available
- static const uint columns = 76;
+ static const unsigned int columns = 76;
// Compute the longest benchmark name
size_t longestName = 0;
// Print header for a file
auto header = [&](const char* file) {
separator('=');
- printf("%-*srelative ns/iter iters/s\n",
- columns - 26, file);
+ printf("%-*srelative time/iter iters/s\n",
+ columns - 28, file);
separator('=');
};
baselineNsPerIter = get<2>(datum);
useBaseline = false;
}
- s.resize(columns - 27, ' ');
+ s.resize(columns - 29, ' ');
auto nsPerIter = get<2>(datum);
- auto itersPerSec = 1E9 / nsPerIter;
+ auto secPerIter = nsPerIter / 1E9;
+ auto itersPerSec = 1 / secPerIter;
if (!useBaseline) {
// Print without baseline
- printf("%*s %s %s\n",
+ printf("%*s %9s %7s\n",
static_cast<int>(s.size()), s.c_str(),
- humanReadable(nsPerIter, 2).c_str(),
- humanReadable(itersPerSec, 2).c_str());
+ readableTime(secPerIter, 2).c_str(),
+ metricReadable(itersPerSec, 2).c_str());
} else {
// Print with baseline
auto rel = baselineNsPerIter / nsPerIter * 100.0;
- printf("%*s %7.2f%% %s %s\n",
+ printf("%*s %7.2f%% %9s %7s\n",
static_cast<int>(s.size()), s.c_str(),
rel,
- humanReadable(nsPerIter, 2).c_str(),
- humanReadable(itersPerSec, 2).c_str());
+ readableTime(secPerIter, 2).c_str(),
+ metricReadable(itersPerSec, 2).c_str());
}
}
separator('=');
vector<tuple<const char*, const char*, double>> results;
results.reserve(benchmarks.size() - 1);
+ std::unique_ptr<boost::regex> bmRegex;
+ if (!FLAGS_bm_regex.empty()) {
+ bmRegex.reset(new boost::regex(FLAGS_bm_regex));
+ }
+
// PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
auto const globalBaseline = runBenchmarkGetNSPerIteration(
get<2>(benchmarks.front()), 0);
FOR_EACH_RANGE (i, 1, benchmarks.size()) {
- auto elapsed = strcmp(get<1>(benchmarks[i]), "-") == 0
- ? 0.0 // skip the separators
- : runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
- globalBaseline);
+ double elapsed = 0.0;
+ if (strcmp(get<1>(benchmarks[i]), "-") != 0) { // skip separators
+ if (bmRegex && !boost::regex_search(get<1>(benchmarks[i]), *bmRegex)) {
+ continue;
+ }
+ elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
+ globalBaseline);
+ }
results.emplace_back(get<0>(benchmarks[i]),
get<1>(benchmarks[i]), elapsed);
}