X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FBenchmark.h;h=cf11ca75b20b02ca67da3dfbaa4fdf198d3b14de;hp=d5bd4cca9e67b8d1b65c390fbb4cfeafd797976c;hb=3a7cbbe49433606c07f53a32cca05d7cb221e433;hpb=13692fcfcee59d9ff1f1a3c291ee0ae13cf5d873 diff --git a/folly/Benchmark.h b/folly/Benchmark.h index d5bd4cca..cf11ca75 100644 --- a/folly/Benchmark.h +++ b/folly/Benchmark.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,20 +14,24 @@ * limitations under the License. */ -#ifndef FOLLY_BENCHMARK_H_ -#define FOLLY_BENCHMARK_H_ +#pragma once + +#include +#include // for FB_ANONYMOUS_VARIABLE +#include +#include +#include -#include "folly/Preprocessor.h" // for FB_ANONYMOUS_VARIABLE #include -#include -#include +#include #include -#include -#include #include +#include -DECLARE_bool(benchmark); +#include +#include +DECLARE_bool(benchmark); namespace folly { @@ -49,12 +53,8 @@ inline bool runBenchmarksOnFlag() { namespace detail { -/** - * This is the clock ID used for measuring time. On older kernels, the - * resolution of this clock will be very coarse, which will cause the - * benchmarks to fail. - */ -enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME }; +using TimeIterPair = + std::pair; /** * Adds a benchmark wrapped in a std::function. Only used @@ -62,46 +62,7 @@ enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME }; */ void addBenchmarkImpl(const char* file, const char* name, - std::function); - -/** - * Takes the difference between two timespec values. end is assumed to - * occur after start. - */ -inline uint64_t timespecDiff(timespec end, timespec start) { - if (end.tv_sec == start.tv_sec) { - assert(end.tv_nsec >= start.tv_nsec); - return end.tv_nsec - start.tv_nsec; - } - assert(end.tv_sec > start.tv_sec && - end.tv_sec - start.tv_sec < - std::numeric_limits::max() / 1000000000UL); - return (end.tv_sec - start.tv_sec) * 1000000000UL - + end.tv_nsec - start.tv_nsec; -} - -/** - * Takes the difference between two sets of timespec values. The first - * two come from a high-resolution clock whereas the other two come - * from a low-resolution clock. The crux of the matter is that - * high-res values may be bogus as documented in - * http://linux.die.net/man/3/clock_gettime. The trouble is when the - * running process migrates from one CPU to another, which is more - * likely for long-running processes. Therefore we watch for high - * differences between the two timings. - * - * This function is subject to further improvements. - */ -inline uint64_t timespecDiff(timespec end, timespec start, - timespec endCoarse, timespec startCoarse) { - auto fine = timespecDiff(end, start); - auto coarse = timespecDiff(endCoarse, startCoarse); - if (coarse - fine >= 1000000) { - // The fine time is in all likelihood bogus - return coarse; - } - return fine; -} + std::function); } // namespace detail @@ -109,67 +70,75 @@ inline uint64_t timespecDiff(timespec end, timespec start, * Supporting type for BENCHMARK_SUSPEND defined below. */ struct BenchmarkSuspender { + using Clock = std::chrono::high_resolution_clock; + using TimePoint = Clock::time_point; + using Duration = Clock::duration; + BenchmarkSuspender() { - CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start)); + start = Clock::now(); } BenchmarkSuspender(const BenchmarkSuspender &) = delete; - BenchmarkSuspender(BenchmarkSuspender && rhs) { + BenchmarkSuspender(BenchmarkSuspender && rhs) noexcept { start = rhs.start; - rhs.start.tv_nsec = rhs.start.tv_sec = 0; + rhs.start = {}; } BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete; BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) { - if (start.tv_nsec > 0 || start.tv_sec > 0) { + if (start != TimePoint{}) { tally(); } start = rhs.start; - rhs.start.tv_nsec = rhs.start.tv_sec = 0; + rhs.start = {}; return *this; } ~BenchmarkSuspender() { - if (start.tv_nsec > 0 || start.tv_sec > 0) { + if (start != TimePoint{}) { tally(); } } void dismiss() { - assert(start.tv_nsec > 0 || start.tv_sec > 0); + assert(start != TimePoint{}); tally(); - start.tv_nsec = start.tv_sec = 0; + start = {}; } void rehire() { - assert(start.tv_nsec == 0 || start.tv_sec == 0); - CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start)); + assert(start == TimePoint{}); + start = Clock::now(); + } + + template + auto dismissing(F f) -> typename std::result_of::type { + SCOPE_EXIT { rehire(); }; + dismiss(); + return f(); } /** - * This helps the macro definition. To get around the dangers of - * operator bool, returns a pointer to member (which allows no - * arithmetic). + * This is for use inside of if-conditions, used in BENCHMARK macros. + * If-conditions bypass the explicit on operator bool. */ - operator int BenchmarkSuspender::*() const { - return nullptr; + explicit operator bool() const { + return false; } /** - * Accumulates nanoseconds spent outside benchmark. + * Accumulates time spent outside benchmark. */ - typedef uint64_t NanosecondsSpent; - static NanosecondsSpent nsSpent; + static Duration timeSpent; -private: + private: void tally() { - timespec end; - CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end)); - nsSpent += detail::timespecDiff(end, start); + auto end = Clock::now(); + timeSpent += end - start; start = end; } - timespec start; + TimePoint start; }; /** @@ -186,20 +155,21 @@ typename std::enable_if< >::type addBenchmark(const char* file, const char* name, Lambda&& lambda) { auto execute = [=](unsigned int times) { - BenchmarkSuspender::nsSpent = 0; - timespec start, end; + BenchmarkSuspender::timeSpent = {}; + unsigned int niter; // CORE MEASUREMENT STARTS - CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start)); - lambda(times); - CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end)); + auto start = std::chrono::high_resolution_clock::now(); + niter = lambda(times); + auto end = std::chrono::high_resolution_clock::now(); // CORE MEASUREMENT ENDS - return detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent; + return detail::TimeIterPair( + (end - start) - BenchmarkSuspender::timeSpent, niter); }; detail::addBenchmarkImpl(file, name, - std::function(execute)); + std::function(execute)); } /** @@ -215,43 +185,135 @@ typename std::enable_if< >::type addBenchmark(const char* file, const char* name, Lambda&& lambda) { addBenchmark(file, name, [=](unsigned int times) { + unsigned int niter = 0; while (times-- > 0) { - lambda(); + niter += lambda(); } + return niter; }); } /** - * Call doNotOptimizeAway(var) against variables that you use for + * Call doNotOptimizeAway(var) to ensure that var will be computed even + * post-optimization. Use it for variables that are computed during * benchmarking but otherwise are useless. The compiler tends to do a - * good job at eliminating unused variables, and this function fools - * it into thinking var is in fact needed. + * good job at eliminating unused variables, and this function fools it + * into thinking var is in fact needed. + * + * Call makeUnpredictable(var) when you don't want the optimizer to use + * its knowledge of var to shape the following code. This is useful + * when constant propagation or power reduction is possible during your + * benchmark but not in real use cases. */ + +#ifdef _MSC_VER + +#pragma optimize("", off) + +inline void doNotOptimizeDependencySink(const void*) {} + +#pragma optimize("", on) + template -void doNotOptimizeAway(T&& datum) { - asm volatile("" : "+r" (datum)); +void doNotOptimizeAway(const T& datum) { + doNotOptimizeDependencySink(&datum); +} + +template +void makeUnpredictable(T& datum) { + doNotOptimizeDependencySink(&datum); +} + +#else + +namespace detail { +template +struct DoNotOptimizeAwayNeedsIndirect { + using Decayed = typename std::decay::type; + + // First two constraints ensure it can be an "r" operand. + // std::is_pointer check is because callers seem to expect that + // doNotOptimizeAway(&x) is equivalent to doNotOptimizeAway(x). + constexpr static bool value = !folly::IsTriviallyCopyable::value || + sizeof(Decayed) > sizeof(long) || std::is_pointer::value; +}; +} // detail namespace + +template +auto doNotOptimizeAway(const T& datum) -> typename std::enable_if< + !detail::DoNotOptimizeAwayNeedsIndirect::value>::type { + // The "r" constraint forces the compiler to make datum available + // in a register to the asm block, which means that it must have + // computed/loaded it. We use this path for things that are <= + // sizeof(long) (they have to fit), trivial (otherwise the compiler + // doesn't want to put them in a register), and not a pointer (because + // doNotOptimizeAway(&foo) would otherwise be a foot gun that didn't + // necessarily compute foo). + // + // An earlier version of this method had a more permissive input operand + // constraint, but that caused unnecessary variation between clang and + // gcc benchmarks. + asm volatile("" ::"r"(datum)); +} + +template +auto doNotOptimizeAway(const T& datum) -> typename std::enable_if< + detail::DoNotOptimizeAwayNeedsIndirect::value>::type { + // This version of doNotOptimizeAway tells the compiler that the asm + // block will read datum from memory, and that in addition it might read + // or write from any memory location. If the memory clobber could be + // separated into input and output that would be preferrable. + asm volatile("" ::"m"(datum) : "memory"); +} + +template +auto makeUnpredictable(T& datum) -> typename std::enable_if< + !detail::DoNotOptimizeAwayNeedsIndirect::value>::type { + asm volatile("" : "+r"(datum)); } +template +auto makeUnpredictable(T& datum) -> typename std::enable_if< + detail::DoNotOptimizeAwayNeedsIndirect::value>::type { + asm volatile("" ::"m"(datum) : "memory"); +} + +#endif + } // namespace folly /** * Introduces a benchmark function. Used internally, see BENCHMARK and * friends below. */ -#define BENCHMARK_IMPL(funName, stringName, paramType, paramName) \ +#define BENCHMARK_IMPL(funName, stringName, rv, paramType, paramName) \ static void funName(paramType); \ static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \ ::folly::addBenchmark(__FILE__, stringName, \ - [](paramType paramName) { funName(paramName); }), \ + [](paramType paramName) -> unsigned { funName(paramName); \ + return rv; }), \ true); \ static void funName(paramType paramName) /** - * Introduces a benchmark function. Use with either one one or two - * arguments. The first is the name of the benchmark. Use something - * descriptive, such as insertVectorBegin. The second argument may be - * missing, or could be a symbolic counter. The counter dictates how - * many internal iteration the benchmark does. Example: + * Introduces a benchmark function with support for returning the actual + * number of iterations. Used internally, see BENCHMARK_MULTI and friends + * below. + */ +#define BENCHMARK_MULTI_IMPL(funName, stringName, paramType, paramName) \ + static unsigned funName(paramType); \ + static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \ + ::folly::addBenchmark(__FILE__, stringName, \ + [](paramType paramName) { return funName(paramName); }), \ + true); \ + static unsigned funName(paramType paramName) + +/** + * Introduces a benchmark function. Use with either one or two arguments. + * The first is the name of the benchmark. Use something descriptive, such + * as insertVectorBegin. The second argument may be missing, or could be a + * symbolic counter. The counter dictates how many internal iteration the + * benchmark does. Example: * * BENCHMARK(vectorPushBack) { * vector v; @@ -267,6 +329,29 @@ void doNotOptimizeAway(T&& datum) { */ #define BENCHMARK(name, ...) \ BENCHMARK_IMPL( \ + name, \ + FB_STRINGIZE(name), \ + FB_ARG_2_OR_1(1, ## __VA_ARGS__), \ + FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \ + __VA_ARGS__) + +/** + * Like BENCHMARK above, but allows the user to return the actual + * number of iterations executed in the function body. This can be + * useful if the benchmark function doesn't know upfront how many + * iterations it's going to run or if it runs through a certain + * number of test cases, e.g.: + * + * BENCHMARK_MULTI(benchmarkSomething) { + * std::vector testCases { 0, 1, 1, 2, 3, 5 }; + * for (int c : testCases) { + * doSomething(c); + * } + * return testCases.size(); + * } + */ +#define BENCHMARK_MULTI(name, ...) \ + BENCHMARK_MULTI_IMPL( \ name, \ FB_STRINGIZE(name), \ FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \ @@ -295,18 +380,65 @@ void doNotOptimizeAway(T&& datum) { * 1000000 for initialSize, and the iteration count for n. */ #define BENCHMARK_PARAM(name, param) \ + BENCHMARK_NAMED_PARAM(name, param, param) + +/** + * Same as BENCHMARK_PARAM, but allows one to return the actual number of + * iterations that have been run. + */ +#define BENCHMARK_PARAM_MULTI(name, param) \ + BENCHMARK_NAMED_PARAM_MULTI(name, param, param) + +/* + * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each + * parameter, rather than using the parameter value. + * + * Useful when the parameter value is not a valid token for string pasting, + * of when you want to specify multiple parameter arguments. + * + * For example: + * + * void addValue(uint n, int64_t bucketSize, int64_t min, int64_t max) { + * Histogram hist(bucketSize, min, max); + * int64_t num = min; + * FOR_EACH_RANGE (i, 0, n) { + * hist.addValue(num); + * ++num; + * if (num > max) { num = min; } + * } + * } + * + * BENCHMARK_NAMED_PARAM(addValue, 0_to_100, 1, 0, 100) + * BENCHMARK_NAMED_PARAM(addValue, 0_to_1000, 10, 0, 1000) + * BENCHMARK_NAMED_PARAM(addValue, 5k_to_20k, 250, 5000, 20000) + */ +#define BENCHMARK_NAMED_PARAM(name, param_name, ...) \ BENCHMARK_IMPL( \ - FB_CONCATENATE(name, FB_CONCATENATE(_, param)), \ - FB_STRINGIZE(name) "(" FB_STRINGIZE(param) ")", \ + FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \ + FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \ + iters, \ unsigned, \ iters) { \ - name(iters, param); \ + name(iters, ## __VA_ARGS__); \ + } + +/** + * Same as BENCHMARK_NAMED_PARAM, but allows one to return the actual number + * of iterations that have been run. + */ +#define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...) \ + BENCHMARK_MULTI_IMPL( \ + FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \ + FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \ + unsigned, \ + iters) { \ + return name(iters, ## __VA_ARGS__); \ } /** * Just like BENCHMARK, but prints the time relative to a * baseline. The baseline is the most recent BENCHMARK() seen in - * lexical order. Example: + * the current scope. Example: * * // This is the baseline * BENCHMARK(insertVectorBegin, n) { @@ -329,6 +461,18 @@ void doNotOptimizeAway(T&& datum) { */ #define BENCHMARK_RELATIVE(name, ...) \ BENCHMARK_IMPL( \ + name, \ + "%" FB_STRINGIZE(name), \ + FB_ARG_2_OR_1(1, ## __VA_ARGS__), \ + FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \ + __VA_ARGS__) + +/** + * Same as BENCHMARK_RELATIVE, but allows one to return the actual number + * of iterations that have been run. + */ +#define BENCHMARK_RELATIVE_MULTI(name, ...) \ + BENCHMARK_MULTI_IMPL( \ name, \ "%" FB_STRINGIZE(name), \ FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \ @@ -338,20 +482,47 @@ void doNotOptimizeAway(T&& datum) { * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM. */ #define BENCHMARK_RELATIVE_PARAM(name, param) \ + BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param) + +/** + * Same as BENCHMARK_RELATIVE_PARAM, but allows one to return the actual + * number of iterations that have been run. + */ +#define BENCHMARK_RELATIVE_PARAM_MULTI(name, param) \ + BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param, param) + +/** + * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM. + */ +#define BENCHMARK_RELATIVE_NAMED_PARAM(name, param_name, ...) \ BENCHMARK_IMPL( \ - FB_CONCATENATE(name, FB_CONCATENATE(_, param)), \ - "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param) ")", \ + FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \ + "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \ + iters, \ + unsigned, \ + iters) { \ + name(iters, ## __VA_ARGS__); \ + } + +/** + * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows one to return the + * actual number of iterations that have been run. + */ +#define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...) \ + BENCHMARK_MULTI_IMPL( \ + FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \ + "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \ unsigned, \ iters) { \ - name(iters, param); \ + return name(iters, ## __VA_ARGS__); \ } /** * Draws a line of dashes. */ -#define BENCHMARK_DRAW_LINE() \ - static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \ - ::folly::addBenchmark(__FILE__, "-", []() { }), \ +#define BENCHMARK_DRAW_LINE() \ + static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \ + ::folly::addBenchmark(__FILE__, "-", []() -> unsigned { return 0; }), \ true); /** @@ -372,5 +543,3 @@ void doNotOptimizeAway(T&& datum) { if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) = \ ::folly::BenchmarkSuspender()) {} \ else - -#endif // FOLLY_BENCHMARK_H_