X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FBenchmark.cpp;h=1b32e9ef3c479f7b81b6a20f923c801ddd589be5;hb=61a96501d8da914c23c4d6d0b9ca49a18f58bea0;hp=171a0e02d42dfbc0fcfbb61402d9f79731248203;hpb=649cb97d1514634985fa4af370ce30adb79dd032;p=folly.git diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp index 171a0e02..1b32e9ef 100644 --- a/folly/Benchmark.cpp +++ b/folly/Benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2014 Facebook, Inc. + * Copyright 2016 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com) -#include "Benchmark.h" -#include "Foreach.h" -#include "json.h" -#include "String.h" +#include +#include +#include +#include #include #include @@ -28,6 +28,7 @@ #include #include #include +#include using namespace std; @@ -51,17 +52,45 @@ namespace folly { BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent; -typedef function BenchmarkFun; -static vector> benchmarks; +typedef function BenchmarkFun; + + +vector>& benchmarks() { + static vector> _benchmarks; + return _benchmarks; +} + +#define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline +#define FB_STRINGIZE_X2(x) FB_STRINGIZE(x) // Add the global baseline -BENCHMARK(globalBenchmarkBaseline) { +BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) { +#ifdef _MSC_VER + _ReadWriteBarrier(); +#else asm volatile(""); +#endif } +int getGlobalBenchmarkBaselineIndex() { + const char *global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE); + auto it = std::find_if( + benchmarks().begin(), + benchmarks().end(), + [global](const tuple &v) { + return get<1>(v) == global; + } + ); + CHECK(it != benchmarks().end()); + return it - benchmarks().begin(); +} + +#undef FB_STRINGIZE_X2 +#undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE + void detail::addBenchmarkImpl(const char* file, const char* name, BenchmarkFun fun) { - benchmarks.emplace_back(file, name, std::move(fun)); + benchmarks().emplace_back(file, name, std::move(fun)); } /** @@ -197,7 +226,7 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, // They key here is accuracy; too low numbers means the accuracy was // coarse. We up the ante until we get to at least minNanoseconds // timings. - static uint64_t resolutionInNs = 0, coarseResolutionInNs = 0; + static uint64_t resolutionInNs = 0; if (!resolutionInNs) { timespec ts; CHECK_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts)); @@ -210,7 +239,7 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, // the clock resolution is worse than that, it will be larger. In // essence we're aiming at making the quantization noise 0.01%. static const auto minNanoseconds = - max(FLAGS_bm_min_usec * 1000UL, + max(FLAGS_bm_min_usec * 1000UL, min(resolutionInNs * 100000, 1000000000ULL)); // We do measurements in several epochs and take the minimum, to @@ -218,7 +247,7 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, static const unsigned int epochs = 1000; // We establish a total time budget as we don't want a measurement // to take too long. This will curtail the number of actual epochs. - const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000; + const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL; timespec global; CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global)); @@ -227,13 +256,14 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, for (; actualEpochs < epochs; ++actualEpochs) { for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) { - auto const nsecs = fun(n); - if (nsecs < minNanoseconds) { + auto const nsecsAndIter = fun(n); + if (nsecsAndIter.first < minNanoseconds) { continue; } // We got an accurate enough timing, done. But only save if // smaller than the current result. - epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline); + epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) / + nsecsAndIter.second - globalBaseline); // Done with the current epoch, we got a meaningful timing. break; } @@ -267,7 +297,7 @@ static const ScaleInfo kTimeSuffixes[] { { 1E-9, "ns" }, { 1E-12, "ps" }, { 1E-15, "fs" }, - { 0, NULL }, + { 0, nullptr }, }; static const ScaleInfo kMetricSuffixes[] { @@ -289,7 +319,7 @@ static const ScaleInfo kMetricSuffixes[] { { 1E-18, "a" }, // atto { 1E-21, "z" }, // zepto { 1E-24, "y" }, // yocto - { 0, NULL }, + { 0, nullptr }, }; static string humanReadable(double n, unsigned int decimals, @@ -300,7 +330,7 @@ static string humanReadable(double n, unsigned int decimals, const double absValue = fabs(n); const ScaleInfo* scale = scales; - while (absValue < scale[0].boundary && scale[1].suffix != NULL) { + while (absValue < scale[0].boundary && scale[1].suffix != nullptr) { ++scale; } @@ -317,14 +347,14 @@ static string metricReadable(double n, unsigned int decimals) { } static void printBenchmarkResultsAsTable( - const vector >& data) { + const vector >& data) { // Width available - static const uint columns = 76; + static const unsigned int columns = 76; // Compute the longest benchmark name size_t longestName = 0; - FOR_EACH_RANGE (i, 1, benchmarks.size()) { - longestName = max(longestName, strlen(get<1>(benchmarks[i]))); + FOR_EACH_RANGE (i, 1, benchmarks().size()) { + longestName = max(longestName, get<1>(benchmarks()[i]).size()); } // Print a horizontal rule @@ -333,19 +363,19 @@ static void printBenchmarkResultsAsTable( }; // Print header for a file - auto header = [&](const char* file) { + auto header = [&](const string& file) { separator('='); printf("%-*srelative time/iter iters/s\n", - columns - 28, file); + columns - 28, file.c_str()); separator('='); }; double baselineNsPerIter = numeric_limits::max(); - const char* lastFile = ""; + string lastFile; for (auto& datum : data) { auto file = get<0>(datum); - if (strcmp(file, lastFile)) { + if (file != lastFile) { // New file starting header(file); lastFile = file; @@ -367,7 +397,9 @@ static void printBenchmarkResultsAsTable( s.resize(columns - 29, ' '); auto nsPerIter = get<2>(datum); auto secPerIter = nsPerIter / 1E9; - auto itersPerSec = 1 / secPerIter; + auto itersPerSec = (secPerIter == 0) + ? std::numeric_limits::infinity() + : (1 / secPerIter); if (!useBaseline) { // Print without baseline printf("%*s %9s %7s\n", @@ -388,7 +420,7 @@ static void printBenchmarkResultsAsTable( } static void printBenchmarkResultsAsJson( - const vector >& data) { + const vector >& data) { dynamic d = dynamic::object; for (auto& datum: data) { d[std::get<1>(datum)] = std::get<2>(datum) * 1000.; @@ -398,7 +430,7 @@ static void printBenchmarkResultsAsJson( } static void printBenchmarkResults( - const vector >& data) { + const vector >& data) { if (FLAGS_json) { printBenchmarkResultsAsJson(data); @@ -408,10 +440,10 @@ static void printBenchmarkResults( } void runBenchmarks() { - CHECK(!benchmarks.empty()); + CHECK(!benchmarks().empty()); - vector> results; - results.reserve(benchmarks.size() - 1); + vector> results; + results.reserve(benchmarks().size() - 1); std::unique_ptr bmRegex; if (!FLAGS_bm_regex.empty()) { @@ -420,19 +452,24 @@ void runBenchmarks() { // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS. - auto const globalBaseline = runBenchmarkGetNSPerIteration( - get<2>(benchmarks.front()), 0); - FOR_EACH_RANGE (i, 1, benchmarks.size()) { + unsigned int baselineIndex = getGlobalBenchmarkBaselineIndex(); + + auto const globalBaseline = + runBenchmarkGetNSPerIteration(get<2>(benchmarks()[baselineIndex]), 0); + FOR_EACH_RANGE (i, 0, benchmarks().size()) { + if (i == baselineIndex) { + continue; + } double elapsed = 0.0; - if (strcmp(get<1>(benchmarks[i]), "-") != 0) { // skip separators - if (bmRegex && !boost::regex_search(get<1>(benchmarks[i]), *bmRegex)) { + if (get<1>(benchmarks()[i]) != "-") { // skip separators + if (bmRegex && !boost::regex_search(get<1>(benchmarks()[i]), *bmRegex)) { continue; } - elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]), + elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks()[i]), globalBaseline); } - results.emplace_back(get<0>(benchmarks[i]), - get<1>(benchmarks[i]), elapsed); + results.emplace_back(get<0>(benchmarks()[i]), + get<1>(benchmarks()[i]), elapsed); } // PLEASE MAKE NOISE. MEASUREMENTS DONE.