/*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef FOLLY_BENCHMARK_H_
-#define FOLLY_BENCHMARK_H_
+#pragma once
#include <folly/Portability.h>
#include <folly/Preprocessor.h> // for FB_ANONYMOUS_VARIABLE
+#include <folly/ScopeGuard.h>
+#include <folly/Traits.h>
+#include <folly/portability/GFlags.h>
+#include <folly/portability/Time.h>
+
#include <cassert>
#include <ctime>
#include <boost/function_types/function_arity.hpp>
#include <functional>
#include <glog/logging.h>
-#include <gflags/gflags.h>
#include <limits>
+#include <type_traits>
DECLARE_bool(benchmark);
namespace detail {
-/**
- * This is the clock ID used for measuring time. On older kernels, the
- * resolution of this clock will be very coarse, which will cause the
- * benchmarks to fail.
- */
-enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
-
typedef std::pair<uint64_t, unsigned int> TimeIterPair;
/**
assert(end.tv_nsec >= start.tv_nsec);
return end.tv_nsec - start.tv_nsec;
}
- assert(end.tv_sec > start.tv_sec &&
- (uint64_t)(end.tv_sec - start.tv_sec) <
+ assert(end.tv_sec > start.tv_sec);
+ auto diff = uint64_t(end.tv_sec - start.tv_sec);
+ assert(diff <
std::numeric_limits<uint64_t>::max() / 1000000000UL);
- return (end.tv_sec - start.tv_sec) * 1000000000UL
+ return diff * 1000000000UL
+ end.tv_nsec - start.tv_nsec;
}
*/
struct BenchmarkSuspender {
BenchmarkSuspender() {
- CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
+ CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &start));
}
BenchmarkSuspender(const BenchmarkSuspender &) = delete;
void rehire() {
assert(start.tv_nsec == 0 || start.tv_sec == 0);
- CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
+ CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &start));
+ }
+
+ template <class F>
+ auto dismissing(F f) -> typename std::result_of<F()>::type {
+ SCOPE_EXIT { rehire(); };
+ dismiss();
+ return f();
}
/**
private:
void tally() {
timespec end;
- CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
+ CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &end));
nsSpent += detail::timespecDiff(end, start);
start = end;
}
unsigned int niter;
// CORE MEASUREMENT STARTS
- auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start);
+ auto const r1 = clock_gettime(CLOCK_REALTIME, &start);
niter = lambda(times);
- auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end);
+ auto const r2 = clock_gettime(CLOCK_REALTIME, &end);
// CORE MEASUREMENT ENDS
CHECK_EQ(0, r1);
}
/**
- * Call doNotOptimizeAway(var) against variables that you use for
+ * Call doNotOptimizeAway(var) to ensure that var will be computed even
+ * post-optimization. Use it for variables that are computed during
* benchmarking but otherwise are useless. The compiler tends to do a
- * good job at eliminating unused variables, and this function fools
- * it into thinking var is in fact needed.
+ * good job at eliminating unused variables, and this function fools it
+ * into thinking var is in fact needed.
+ *
+ * Call makeUnpredictable(var) when you don't want the optimizer to use
+ * its knowledge of var to shape the following code. This is useful
+ * when constant propagation or power reduction is possible during your
+ * benchmark but not in real use cases.
*/
+
#ifdef _MSC_VER
#pragma optimize("", off)
+inline void doNotOptimizeDependencySink(const void*) {}
+
+#pragma optimize("", on)
+
template <class T>
-void doNotOptimizeAway(T&& datum) {
- datum = datum;
+void doNotOptimizeAway(const T& datum) {
+ doNotOptimizeDependencySink(&datum);
}
-#pragma optimize("", on)
+template <typename T>
+void makeUnpredictable(T& datum) {
+ doNotOptimizeDependencySink(&datum);
+}
#else
-template <class T>
-void doNotOptimizeAway(T&& datum) {
- asm volatile("" : "+r" (datum));
+
+namespace detail {
+template <typename T>
+struct DoNotOptimizeAwayNeedsIndirect {
+ using Decayed = typename std::decay<T>::type;
+
+ // First two constraints ensure it can be an "r" operand.
+ // std::is_pointer check is because callers seem to expect that
+ // doNotOptimizeAway(&x) is equivalent to doNotOptimizeAway(x).
+ constexpr static bool value = !folly::IsTriviallyCopyable<Decayed>::value ||
+ sizeof(Decayed) > sizeof(long) || std::is_pointer<Decayed>::value;
+};
+} // detail namespace
+
+template <typename T>
+auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
+ !detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" ::"X"(datum));
+}
+
+template <typename T>
+auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
+ detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" ::"m"(datum) : "memory");
}
+
+template <typename T>
+auto makeUnpredictable(T& datum) -> typename std::enable_if<
+ !detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" : "+r"(datum));
+}
+
+template <typename T>
+auto makeUnpredictable(T& datum) -> typename std::enable_if<
+ detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" ::"m"(datum) : "memory");
+}
+
#endif
} // namespace folly
static unsigned funName(paramType paramName)
/**
- * Introduces a benchmark function. Use with either one one or two
- * arguments. The first is the name of the benchmark. Use something
- * descriptive, such as insertVectorBegin. The second argument may be
- * missing, or could be a symbolic counter. The counter dictates how
- * many internal iteration the benchmark does. Example:
+ * Introduces a benchmark function. Use with either one or two arguments.
+ * The first is the name of the benchmark. Use something descriptive, such
+ * as insertVectorBegin. The second argument may be missing, or could be a
+ * symbolic counter. The counter dictates how many internal iteration the
+ * benchmark does. Example:
*
* BENCHMARK(vectorPushBack) {
* vector<int> v;
BENCHMARK_NAMED_PARAM(name, param, param)
/**
- * Same as BENCHMARK_PARAM, but allows to return the actual number of
+ * Same as BENCHMARK_PARAM, but allows one to return the actual number of
* iterations that have been run.
*/
#define BENCHMARK_PARAM_MULTI(name, param) \
}
/**
- * Same as BENCHMARK_NAMED_PARAM, but allows to return the actual number
+ * Same as BENCHMARK_NAMED_PARAM, but allows one to return the actual number
* of iterations that have been run.
*/
#define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...) \
/**
* Just like BENCHMARK, but prints the time relative to a
* baseline. The baseline is the most recent BENCHMARK() seen in
- * lexical order. Example:
+ * the current scope. Example:
*
* // This is the baseline
* BENCHMARK(insertVectorBegin, n) {
__VA_ARGS__)
/**
- * Same as BENCHMARK_RELATIVE, but allows to return the actual number
+ * Same as BENCHMARK_RELATIVE, but allows one to return the actual number
* of iterations that have been run.
*/
#define BENCHMARK_RELATIVE_MULTI(name, ...) \
BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
/**
- * Same as BENCHMARK_RELATIVE_PARAM, but allows to return the actual
+ * Same as BENCHMARK_RELATIVE_PARAM, but allows one to return the actual
* number of iterations that have been run.
*/
#define BENCHMARK_RELATIVE_PARAM_MULTI(name, param) \
}
/**
- * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows to return the
+ * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows one to return the
* actual number of iterations that have been run.
*/
#define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...) \
if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) = \
::folly::BenchmarkSuspender()) {} \
else
-
-#endif // FOLLY_BENCHMARK_H_