X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FBenchmark.cpp;h=2e9a7ef48cae09f63c3ac0735c316f1c64fc1bcf;hb=b22d1721ed9c062e22b33936e13b39ed1507391a;hp=6c481335a63eaee06a1129873b8f3a68c865e4de;hpb=a83ba29c59e69de4c7c834960dc65bb471de9f25;p=folly.git

diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp
index 6c481335..2e9a7ef4 100644
--- a/folly/Benchmark.cpp
+++ b/folly/Benchmark.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2016 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,23 +17,29 @@
 // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
 
 #include <folly/Benchmark.h>
-#include <folly/Foreach.h>
-#include <folly/json.h>
-#include <folly/String.h>
 
 #include <algorithm>
-#include <boost/regex.hpp>
 #include <cmath>
+#include <cstring>
 #include <iostream>
 #include <limits>
+#include <map>
+#include <memory>
 #include <utility>
 #include <vector>
-#include <cstring>
+
+#include <boost/regex.hpp>
+
+#include <folly/MapUtil.h>
+#include <folly/String.h>
+#include <folly/container/Foreach.h>
+#include <folly/json.h>
 
 using namespace std;
 
 DEFINE_bool(benchmark, false, "Run benchmarks.");
 DEFINE_bool(json, false, "Output in JSON format.");
+DEFINE_bool(json_verbose, false, "Output in verbose JSON format.");
 
 DEFINE_string(
     bm_regex,
@@ -52,7 +58,7 @@ DEFINE_int32(
 
 DEFINE_int64(
     bm_max_iters,
-    1L << 30L,
+    1 << 30,
     "Maximum # of iterations we'll try for each benchmark.");
 
 DEFINE_int32(
@@ -62,13 +68,12 @@ DEFINE_int32(
 
 namespace folly {
 
-BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
+std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
 
 typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
 
-
-vector<tuple<string, string, BenchmarkFun>>& benchmarks() {
-  static vector<tuple<string, string, BenchmarkFun>> _benchmarks;
+vector<detail::BenchmarkRegistration>& benchmarks() {
+  static vector<detail::BenchmarkRegistration> _benchmarks;
   return _benchmarks;
 }
 
@@ -87,12 +92,11 @@ BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) {
 size_t getGlobalBenchmarkBaselineIndex() {
   const char *global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
   auto it = std::find_if(
-    benchmarks().begin(),
-    benchmarks().end(),
-    [global](const tuple<string, string, BenchmarkFun> &v) {
-      return get<1>(v) == global;
-    }
-  );
+      benchmarks().begin(),
+      benchmarks().end(),
+      [global](const detail::BenchmarkRegistration& v) {
+        return v.name == global;
+      });
   CHECK(it != benchmarks().end());
   return size_t(std::distance(benchmarks().begin(), it));
 }
@@ -102,41 +106,7 @@ size_t getGlobalBenchmarkBaselineIndex() {
 
 void detail::addBenchmarkImpl(const char* file, const char* name,
                               BenchmarkFun fun) {
-  benchmarks().emplace_back(file, name, std::move(fun));
-}
-
-/**
- * Given a point, gives density at that point as a number 0.0 < x <=
- * 1.0. The result is 1.0 if all samples are equal to where, and
- * decreases near 0 if all points are far away from it. The density is
- * computed with the help of a radial basis function.
- */
-static double density(const double * begin, const double *const end,
-                      const double where, const double bandwidth) {
-  assert(begin < end);
-  assert(bandwidth > 0.0);
-  double sum = 0.0;
-  FOR_EACH_RANGE (i, begin, end) {
-    auto d = (*i - where) / bandwidth;
-    sum += exp(- d * d);
-  }
-  return sum / (end - begin);
-}
-
-/**
- * Computes mean and variance for a bunch of data points. Note that
- * mean is currently not being used.
- */
-static pair<double, double>
-meanVariance(const double * begin, const double *const end) {
-  assert(begin < end);
-  double sum = 0.0, sum2 = 0.0;
-  FOR_EACH_RANGE (i, begin, end) {
-    sum += *i;
-    sum2 += *i * *i;
-  }
-  auto const n = end - begin;
-  return make_pair(sum / n, sqrt((sum2 - sum * sum / n) / n));
+  benchmarks().push_back({file, name, std::move(fun)});
 }
 
 /**
@@ -152,54 +122,52 @@ static double estimateTime(double * begin, double * end) {
 
 static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
                                             const double globalBaseline) {
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::microseconds;
+  using std::chrono::nanoseconds;
+  using std::chrono::seconds;
+
   // They key here is accuracy; too low numbers means the accuracy was
   // coarse. We up the ante until we get to at least minNanoseconds
   // timings.
-  static uint64_t resolutionInNs = 0;
-  if (!resolutionInNs) {
-    timespec ts;
-    CHECK_EQ(0, clock_getres(CLOCK_REALTIME, &ts));
-    CHECK_EQ(0, ts.tv_sec) << "Clock sucks.";
-    CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good.";
-    CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
-    resolutionInNs = ts.tv_nsec;
-  }
+  static_assert(
+      std::is_same<high_resolution_clock::duration, nanoseconds>::value,
+      "High resolution clock must be nanosecond resolution.");
   // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
   // the clock resolution is worse than that, it will be larger. In
   // essence we're aiming at making the quantization noise 0.01%.
-  static const auto minNanoseconds =
-    max<uint64_t>(FLAGS_bm_min_usec * 1000UL,
-        min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
+  static const auto minNanoseconds = std::max<nanoseconds>(
+      nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
 
   // We do measurements in several epochs and take the minimum, to
   // account for jitter.
   static const unsigned int epochs = 1000;
   // We establish a total time budget as we don't want a measurement
   // to take too long. This will curtail the number of actual epochs.
-  const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL;
-  timespec global;
-  CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
+  const auto timeBudget = seconds(FLAGS_bm_max_secs);
+  auto global = high_resolution_clock::now();
 
   double epochResults[epochs] = { 0 };
   size_t actualEpochs = 0;
 
   for (; actualEpochs < epochs; ++actualEpochs) {
-    const auto maxIters = FLAGS_bm_max_iters;
-    for (unsigned int n = FLAGS_bm_min_iters; n < maxIters; n *= 2) {
-      auto const nsecsAndIter = fun(n);
+    const auto maxIters = uint32_t(FLAGS_bm_max_iters);
+    for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= 2) {
+      auto const nsecsAndIter = fun(static_cast<unsigned int>(n));
       if (nsecsAndIter.first < minNanoseconds) {
         continue;
       }
       // We got an accurate enough timing, done. But only save if
       // smaller than the current result.
-      epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
-                                       nsecsAndIter.second - globalBaseline);
+      auto nsecs = duration_cast<nanoseconds>(nsecsAndIter.first).count();
+      epochResults[actualEpochs] =
+          max(0.0, double(nsecs) / nsecsAndIter.second - globalBaseline);
       // Done with the current epoch, we got a meaningful timing.
       break;
     }
-    timespec now;
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
-    if (detail::timespecDiff(now, global) >= timeBudgetInNs) {
+    auto now = high_resolution_clock::now();
+    if (now - global >= timeBudget) {
       // No more time budget available.
       ++actualEpochs;
       break;
@@ -277,14 +245,14 @@ static string metricReadable(double n, unsigned int decimals) {
 }
 
 static void printBenchmarkResultsAsTable(
-  const vector<tuple<string, string, double> >& data) {
+    const vector<detail::BenchmarkResult>& data) {
   // Width available
   static const unsigned int columns = 76;
 
   // Compute the longest benchmark name
   size_t longestName = 0;
-  FOR_EACH_RANGE (i, 1, benchmarks().size()) {
-    longestName = max(longestName, get<1>(benchmarks()[i]).size());
+  for (auto& bm : benchmarks()) {
+    longestName = max(longestName, bm.name.size());
   }
 
   // Print a horizontal rule
@@ -304,14 +272,14 @@ static void printBenchmarkResultsAsTable(
   string lastFile;
 
   for (auto& datum : data) {
-    auto file = get<0>(datum);
+    auto file = datum.file;
     if (file != lastFile) {
       // New file starting
       header(file);
       lastFile = file;
     }
 
-    string s = get<1>(datum);
+    string s = datum.name;
     if (s == "-") {
       separator('-');
       continue;
@@ -321,11 +289,11 @@ static void printBenchmarkResultsAsTable(
       s.erase(0, 1);
       useBaseline = true;
     } else {
-      baselineNsPerIter = get<2>(datum);
+      baselineNsPerIter = datum.timeInNs;
       useBaseline = false;
     }
     s.resize(columns - 29, ' ');
-    auto nsPerIter = get<2>(datum);
+    auto nsPerIter = datum.timeInNs;
     auto secPerIter = nsPerIter / 1E9;
     auto itersPerSec = (secPerIter == 0)
                            ? std::numeric_limits<double>::infinity()
@@ -350,34 +318,141 @@ static void printBenchmarkResultsAsTable(
 }
 
 static void printBenchmarkResultsAsJson(
-  const vector<tuple<string, string, double> >& data) {
+    const vector<detail::BenchmarkResult>& data) {
   dynamic d = dynamic::object;
   for (auto& datum: data) {
-    d[std::get<1>(datum)] = std::get<2>(datum) * 1000.;
+    d[datum.name] = datum.timeInNs * 1000.;
   }
 
   printf("%s\n", toPrettyJson(d).c_str());
 }
 
-static void printBenchmarkResults(
-  const vector<tuple<string, string, double> >& data) {
+static void printBenchmarkResultsAsVerboseJson(
+    const vector<detail::BenchmarkResult>& data) {
+  dynamic d;
+  benchmarkResultsToDynamic(data, d);
+  printf("%s\n", toPrettyJson(d).c_str());
+}
 
-  if (FLAGS_json) {
+static void printBenchmarkResults(const vector<detail::BenchmarkResult>& data) {
+  if (FLAGS_json_verbose) {
+    printBenchmarkResultsAsVerboseJson(data);
+  } else if (FLAGS_json) {
     printBenchmarkResultsAsJson(data);
   } else {
     printBenchmarkResultsAsTable(data);
   }
 }
 
+void benchmarkResultsToDynamic(
+    const vector<detail::BenchmarkResult>& data,
+    dynamic& out) {
+  out = dynamic::array;
+  for (auto& datum : data) {
+    out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
+  }
+}
+
+void benchmarkResultsFromDynamic(
+    const dynamic& d,
+    vector<detail::BenchmarkResult>& results) {
+  for (auto& datum : d) {
+    results.push_back(
+        {datum[0].asString(), datum[1].asString(), datum[2].asDouble()});
+  }
+}
+
+static pair<StringPiece, StringPiece> resultKey(
+    const detail::BenchmarkResult& result) {
+  return pair<StringPiece, StringPiece>(result.file, result.name);
+}
+
+void printResultComparison(
+    const vector<detail::BenchmarkResult>& base,
+    const vector<detail::BenchmarkResult>& test) {
+  map<pair<StringPiece, StringPiece>, double> baselines;
+
+  for (auto& baseResult : base) {
+    baselines[resultKey(baseResult)] = baseResult.timeInNs;
+  }
+  //
+  // Width available
+  static const unsigned int columns = 76;
+
+  // Compute the longest benchmark name
+  size_t longestName = 0;
+  for (auto& datum : test) {
+    longestName = max(longestName, datum.name.size());
+  }
+
+  // Print a horizontal rule
+  auto separator = [&](char pad) { puts(string(columns, pad).c_str()); };
+
+  // Print header for a file
+  auto header = [&](const string& file) {
+    separator('=');
+    printf("%-*srelative  time/iter  iters/s\n", columns - 28, file.c_str());
+    separator('=');
+  };
+
+  string lastFile;
+
+  for (auto& datum : test) {
+    folly::Optional<double> baseline =
+        folly::get_optional(baselines, resultKey(datum));
+    auto file = datum.file;
+    if (file != lastFile) {
+      // New file starting
+      header(file);
+      lastFile = file;
+    }
+
+    string s = datum.name;
+    if (s == "-") {
+      separator('-');
+      continue;
+    }
+    if (s[0] == '%') {
+      s.erase(0, 1);
+    }
+    s.resize(columns - 29, ' ');
+    auto nsPerIter = datum.timeInNs;
+    auto secPerIter = nsPerIter / 1E9;
+    auto itersPerSec = (secPerIter == 0)
+        ? std::numeric_limits<double>::infinity()
+        : (1 / secPerIter);
+    if (!baseline) {
+      // Print without baseline
+      printf(
+          "%*s           %9s  %7s\n",
+          static_cast<int>(s.size()),
+          s.c_str(),
+          readableTime(secPerIter, 2).c_str(),
+          metricReadable(itersPerSec, 2).c_str());
+    } else {
+      // Print with baseline
+      auto rel = *baseline / nsPerIter * 100.0;
+      printf(
+          "%*s %7.2f%%  %9s  %7s\n",
+          static_cast<int>(s.size()),
+          s.c_str(),
+          rel,
+          readableTime(secPerIter, 2).c_str(),
+          metricReadable(itersPerSec, 2).c_str());
+    }
+  }
+  separator('=');
+}
+
 void runBenchmarks() {
   CHECK(!benchmarks().empty());
 
-  vector<tuple<string, string, double>> results;
+  vector<detail::BenchmarkResult> results;
   results.reserve(benchmarks().size() - 1);
 
   std::unique_ptr<boost::regex> bmRegex;
   if (!FLAGS_bm_regex.empty()) {
-    bmRegex.reset(new boost::regex(FLAGS_bm_regex));
+    bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
   }
 
   // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
@@ -385,21 +460,20 @@ void runBenchmarks() {
   size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
 
   auto const globalBaseline =
-      runBenchmarkGetNSPerIteration(get<2>(benchmarks()[baselineIndex]), 0);
+      runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, 0);
   FOR_EACH_RANGE (i, 0, benchmarks().size()) {
     if (i == baselineIndex) {
       continue;
     }
     double elapsed = 0.0;
-    if (get<1>(benchmarks()[i]) != "-") { // skip separators
-      if (bmRegex && !boost::regex_search(get<1>(benchmarks()[i]), *bmRegex)) {
+    auto& bm = benchmarks()[i];
+    if (bm.name != "-") { // skip separators
+      if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
         continue;
       }
-      elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks()[i]),
-                                              globalBaseline);
+      elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline);
     }
-    results.emplace_back(get<0>(benchmarks()[i]),
-                         get<1>(benchmarks()[i]), elapsed);
+    results.push_back({bm.file, bm.name, elapsed});
   }
 
   // PLEASE MAKE NOISE. MEASUREMENTS DONE.