/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef FOLLY_BENCHMARK_H_
-#define FOLLY_BENCHMARK_H_
+#pragma once
+
+#include <folly/Portability.h>
+#include <folly/Preprocessor.h> // for FB_ANONYMOUS_VARIABLE
+#include <folly/ScopeGuard.h>
+#include <folly/Traits.h>
+#include <folly/portability/GFlags.h>
-#include "folly/Preprocessor.h" // for FB_ANONYMOUS_VARIABLE
#include <cassert>
-#include <ctime>
-#include <boost/function_types/function_arity.hpp>
+#include <chrono>
#include <functional>
-#include <glog/logging.h>
-#include <gflags/gflags.h>
#include <limits>
+#include <type_traits>
-DECLARE_bool(benchmark);
+#include <boost/function_types/function_arity.hpp>
+#include <glog/logging.h>
+DECLARE_bool(benchmark);
namespace folly {
namespace detail {
-/**
- * This is the clock ID used for measuring time. On older kernels, the
- * resolution of this clock will be very coarse, which will cause the
- * benchmarks to fail.
- */
-enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
+using TimeIterPair =
+ std::pair<std::chrono::high_resolution_clock::duration, unsigned int>;
+using BenchmarkFun = std::function<detail::TimeIterPair(unsigned int)>;
+
+struct BenchmarkRegistration {
+ std::string file;
+ std::string name;
+ BenchmarkFun func;
+};
+
+struct BenchmarkResult {
+ std::string file;
+ std::string name;
+ double timeInNs;
+};
/**
* Adds a benchmark wrapped in a std::function. Only used
*/
void addBenchmarkImpl(const char* file,
const char* name,
- std::function<uint64_t(unsigned int)>);
-
-/**
- * Takes the difference between two timespec values. end is assumed to
- * occur after start.
- */
-inline uint64_t timespecDiff(timespec end, timespec start) {
- if (end.tv_sec == start.tv_sec) {
- assert(end.tv_nsec >= start.tv_nsec);
- return end.tv_nsec - start.tv_nsec;
- }
- assert(end.tv_sec > start.tv_sec &&
- end.tv_sec - start.tv_sec <
- std::numeric_limits<uint64_t>::max() / 1000000000UL);
- return (end.tv_sec - start.tv_sec) * 1000000000UL
- + end.tv_nsec - start.tv_nsec;
-}
-
-/**
- * Takes the difference between two sets of timespec values. The first
- * two come from a high-resolution clock whereas the other two come
- * from a low-resolution clock. The crux of the matter is that
- * high-res values may be bogus as documented in
- * http://linux.die.net/man/3/clock_gettime. The trouble is when the
- * running process migrates from one CPU to another, which is more
- * likely for long-running processes. Therefore we watch for high
- * differences between the two timings.
- *
- * This function is subject to further improvements.
- */
-inline uint64_t timespecDiff(timespec end, timespec start,
- timespec endCoarse, timespec startCoarse) {
- auto fine = timespecDiff(end, start);
- auto coarse = timespecDiff(endCoarse, startCoarse);
- if (coarse - fine >= 1000000) {
- // The fine time is in all likelihood bogus
- return coarse;
- }
- return fine;
-}
+ std::function<TimeIterPair(unsigned int)>);
} // namespace detail
* Supporting type for BENCHMARK_SUSPEND defined below.
*/
struct BenchmarkSuspender {
+ using Clock = std::chrono::high_resolution_clock;
+ using TimePoint = Clock::time_point;
+ using Duration = Clock::duration;
+
BenchmarkSuspender() {
- CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
+ start = Clock::now();
}
BenchmarkSuspender(const BenchmarkSuspender &) = delete;
- BenchmarkSuspender(BenchmarkSuspender && rhs) {
+ BenchmarkSuspender(BenchmarkSuspender && rhs) noexcept {
start = rhs.start;
- rhs.start.tv_nsec = rhs.start.tv_sec = 0;
+ rhs.start = {};
}
BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
- if (start.tv_nsec > 0 || start.tv_sec > 0) {
+ if (start != TimePoint{}) {
tally();
}
start = rhs.start;
- rhs.start.tv_nsec = rhs.start.tv_sec = 0;
+ rhs.start = {};
return *this;
}
~BenchmarkSuspender() {
- if (start.tv_nsec > 0 || start.tv_sec > 0) {
+ if (start != TimePoint{}) {
tally();
}
}
void dismiss() {
- assert(start.tv_nsec > 0 || start.tv_sec > 0);
+ assert(start != TimePoint{});
tally();
- start.tv_nsec = start.tv_sec = 0;
+ start = {};
}
void rehire() {
- assert(start.tv_nsec == 0 || start.tv_sec == 0);
- CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
+ assert(start == TimePoint{});
+ start = Clock::now();
+ }
+
+ template <class F>
+ auto dismissing(F f) -> typename std::result_of<F()>::type {
+ SCOPE_EXIT { rehire(); };
+ dismiss();
+ return f();
}
/**
- * This helps the macro definition. To get around the dangers of
- * operator bool, returns a pointer to member (which allows no
- * arithmetic).
+ * This is for use inside of if-conditions, used in BENCHMARK macros.
+ * If-conditions bypass the explicit on operator bool.
*/
- operator int BenchmarkSuspender::*() const {
- return nullptr;
+ explicit operator bool() const {
+ return false;
}
/**
- * Accumulates nanoseconds spent outside benchmark.
+ * Accumulates time spent outside benchmark.
*/
- typedef uint64_t NanosecondsSpent;
- static NanosecondsSpent nsSpent;
+ static Duration timeSpent;
-private:
+ private:
void tally() {
- timespec end;
- CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
- nsSpent += detail::timespecDiff(end, start);
+ auto end = Clock::now();
+ timeSpent += end - start;
start = end;
}
- timespec start;
+ TimePoint start;
};
/**
>::type
addBenchmark(const char* file, const char* name, Lambda&& lambda) {
auto execute = [=](unsigned int times) {
- BenchmarkSuspender::nsSpent = 0;
- timespec start, end;
+ BenchmarkSuspender::timeSpent = {};
+ unsigned int niter;
// CORE MEASUREMENT STARTS
- auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start);
- lambda(times);
- auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end);
+ auto start = std::chrono::high_resolution_clock::now();
+ niter = lambda(times);
+ auto end = std::chrono::high_resolution_clock::now();
// CORE MEASUREMENT ENDS
- CHECK_EQ(0, r1);
- CHECK_EQ(0, r2);
-
- return detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent;
+ return detail::TimeIterPair(
+ (end - start) - BenchmarkSuspender::timeSpent, niter);
};
detail::addBenchmarkImpl(file, name,
- std::function<uint64_t(unsigned int)>(execute));
+ std::function<detail::TimeIterPair(unsigned int)>(execute));
}
/**
>::type
addBenchmark(const char* file, const char* name, Lambda&& lambda) {
addBenchmark(file, name, [=](unsigned int times) {
+ unsigned int niter = 0;
while (times-- > 0) {
- lambda();
+ niter += lambda();
}
+ return niter;
});
}
/**
- * Call doNotOptimizeAway(var) against variables that you use for
+ * Call doNotOptimizeAway(var) to ensure that var will be computed even
+ * post-optimization. Use it for variables that are computed during
* benchmarking but otherwise are useless. The compiler tends to do a
- * good job at eliminating unused variables, and this function fools
- * it into thinking var is in fact needed.
+ * good job at eliminating unused variables, and this function fools it
+ * into thinking var is in fact needed.
+ *
+ * Call makeUnpredictable(var) when you don't want the optimizer to use
+ * its knowledge of var to shape the following code. This is useful
+ * when constant propagation or power reduction is possible during your
+ * benchmark but not in real use cases.
*/
+
+#ifdef _MSC_VER
+
+#pragma optimize("", off)
+
+inline void doNotOptimizeDependencySink(const void*) {}
+
+#pragma optimize("", on)
+
template <class T>
-void doNotOptimizeAway(T&& datum) {
- asm volatile("" : "+r" (datum));
+void doNotOptimizeAway(const T& datum) {
+ doNotOptimizeDependencySink(&datum);
+}
+
+template <typename T>
+void makeUnpredictable(T& datum) {
+ doNotOptimizeDependencySink(&datum);
}
+#else
+
+namespace detail {
+template <typename T>
+struct DoNotOptimizeAwayNeedsIndirect {
+ using Decayed = typename std::decay<T>::type;
+
+ // First two constraints ensure it can be an "r" operand.
+ // std::is_pointer check is because callers seem to expect that
+ // doNotOptimizeAway(&x) is equivalent to doNotOptimizeAway(x).
+ constexpr static bool value = !folly::IsTriviallyCopyable<Decayed>::value ||
+ sizeof(Decayed) > sizeof(long) || std::is_pointer<Decayed>::value;
+};
+} // namespace detail
+
+template <typename T>
+auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
+ !detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ // The "r" constraint forces the compiler to make datum available
+ // in a register to the asm block, which means that it must have
+ // computed/loaded it. We use this path for things that are <=
+ // sizeof(long) (they have to fit), trivial (otherwise the compiler
+ // doesn't want to put them in a register), and not a pointer (because
+ // doNotOptimizeAway(&foo) would otherwise be a foot gun that didn't
+ // necessarily compute foo).
+ //
+ // An earlier version of this method had a more permissive input operand
+ // constraint, but that caused unnecessary variation between clang and
+ // gcc benchmarks.
+ asm volatile("" ::"r"(datum));
+}
+
+template <typename T>
+auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
+ detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ // This version of doNotOptimizeAway tells the compiler that the asm
+ // block will read datum from memory, and that in addition it might read
+ // or write from any memory location. If the memory clobber could be
+ // separated into input and output that would be preferrable.
+ asm volatile("" ::"m"(datum) : "memory");
+}
+
+template <typename T>
+auto makeUnpredictable(T& datum) -> typename std::enable_if<
+ !detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" : "+r"(datum));
+}
+
+template <typename T>
+auto makeUnpredictable(T& datum) -> typename std::enable_if<
+ detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" ::"m"(datum) : "memory");
+}
+
+#endif
+
+struct dynamic;
+
+void benchmarkResultsToDynamic(
+ const std::vector<detail::BenchmarkResult>& data,
+ dynamic&);
+
+void benchmarkResultsFromDynamic(
+ const dynamic&,
+ std::vector<detail::BenchmarkResult>&);
+
+void printResultComparison(
+ const std::vector<detail::BenchmarkResult>& base,
+ const std::vector<detail::BenchmarkResult>& test);
+
} // namespace folly
/**
* Introduces a benchmark function. Used internally, see BENCHMARK and
* friends below.
*/
-#define BENCHMARK_IMPL(funName, stringName, paramType, paramName) \
+#define BENCHMARK_IMPL(funName, stringName, rv, paramType, paramName) \
static void funName(paramType); \
static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \
::folly::addBenchmark(__FILE__, stringName, \
- [](paramType paramName) { funName(paramName); }), \
+ [](paramType paramName) -> unsigned { funName(paramName); \
+ return rv; }), \
true); \
static void funName(paramType paramName)
/**
- * Introduces a benchmark function. Use with either one one or two
- * arguments. The first is the name of the benchmark. Use something
- * descriptive, such as insertVectorBegin. The second argument may be
- * missing, or could be a symbolic counter. The counter dictates how
- * many internal iteration the benchmark does. Example:
+ * Introduces a benchmark function with support for returning the actual
+ * number of iterations. Used internally, see BENCHMARK_MULTI and friends
+ * below.
+ */
+#define BENCHMARK_MULTI_IMPL(funName, stringName, paramType, paramName) \
+ static unsigned funName(paramType); \
+ static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \
+ ::folly::addBenchmark(__FILE__, stringName, \
+ [](paramType paramName) { return funName(paramName); }), \
+ true); \
+ static unsigned funName(paramType paramName)
+
+/**
+ * Introduces a benchmark function. Use with either one or two arguments.
+ * The first is the name of the benchmark. Use something descriptive, such
+ * as insertVectorBegin. The second argument may be missing, or could be a
+ * symbolic counter. The counter dictates how many internal iteration the
+ * benchmark does. Example:
*
* BENCHMARK(vectorPushBack) {
* vector<int> v;
*/
#define BENCHMARK(name, ...) \
BENCHMARK_IMPL( \
+ name, \
+ FB_STRINGIZE(name), \
+ FB_ARG_2_OR_1(1, ## __VA_ARGS__), \
+ FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \
+ __VA_ARGS__)
+
+/**
+ * Like BENCHMARK above, but allows the user to return the actual
+ * number of iterations executed in the function body. This can be
+ * useful if the benchmark function doesn't know upfront how many
+ * iterations it's going to run or if it runs through a certain
+ * number of test cases, e.g.:
+ *
+ * BENCHMARK_MULTI(benchmarkSomething) {
+ * std::vector<int> testCases { 0, 1, 1, 2, 3, 5 };
+ * for (int c : testCases) {
+ * doSomething(c);
+ * }
+ * return testCases.size();
+ * }
+ */
+#define BENCHMARK_MULTI(name, ...) \
+ BENCHMARK_MULTI_IMPL( \
name, \
FB_STRINGIZE(name), \
FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \
#define BENCHMARK_PARAM(name, param) \
BENCHMARK_NAMED_PARAM(name, param, param)
+/**
+ * Same as BENCHMARK_PARAM, but allows one to return the actual number of
+ * iterations that have been run.
+ */
+#define BENCHMARK_PARAM_MULTI(name, param) \
+ BENCHMARK_NAMED_PARAM_MULTI(name, param, param)
+
/*
* Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
* parameter, rather than using the parameter value.
BENCHMARK_IMPL( \
FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
+ iters, \
unsigned, \
iters) { \
name(iters, ## __VA_ARGS__); \
}
+/**
+ * Same as BENCHMARK_NAMED_PARAM, but allows one to return the actual number
+ * of iterations that have been run.
+ */
+#define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...) \
+ BENCHMARK_MULTI_IMPL( \
+ FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
+ FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
+ unsigned, \
+ iters) { \
+ return name(iters, ## __VA_ARGS__); \
+ }
+
/**
* Just like BENCHMARK, but prints the time relative to a
* baseline. The baseline is the most recent BENCHMARK() seen in
- * lexical order. Example:
+ * the current scope. Example:
*
* // This is the baseline
* BENCHMARK(insertVectorBegin, n) {
*/
#define BENCHMARK_RELATIVE(name, ...) \
BENCHMARK_IMPL( \
+ name, \
+ "%" FB_STRINGIZE(name), \
+ FB_ARG_2_OR_1(1, ## __VA_ARGS__), \
+ FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \
+ __VA_ARGS__)
+
+/**
+ * Same as BENCHMARK_RELATIVE, but allows one to return the actual number
+ * of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_MULTI(name, ...) \
+ BENCHMARK_MULTI_IMPL( \
name, \
"%" FB_STRINGIZE(name), \
FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \
#define BENCHMARK_RELATIVE_PARAM(name, param) \
BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
+/**
+ * Same as BENCHMARK_RELATIVE_PARAM, but allows one to return the actual
+ * number of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_PARAM_MULTI(name, param) \
+ BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param, param)
+
/**
* A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
*/
BENCHMARK_IMPL( \
FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
"%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
+ iters, \
unsigned, \
iters) { \
name(iters, ## __VA_ARGS__); \
}
+/**
+ * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows one to return the
+ * actual number of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...) \
+ BENCHMARK_MULTI_IMPL( \
+ FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
+ "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
+ unsigned, \
+ iters) { \
+ return name(iters, ## __VA_ARGS__); \
+ }
+
/**
* Draws a line of dashes.
*/
-#define BENCHMARK_DRAW_LINE() \
- static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \
- ::folly::addBenchmark(__FILE__, "-", []() { }), \
+#define BENCHMARK_DRAW_LINE() \
+ static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \
+ ::folly::addBenchmark(__FILE__, "-", []() -> unsigned { return 0; }), \
true);
/**
if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) = \
::folly::BenchmarkSuspender()) {} \
else
-
-#endif // FOLLY_BENCHMARK_H_