Improve folly::RequestContext onSet and onUnset efficiency

[folly.git] / folly / Benchmark.cpp
diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp

index 93aa2efe3275572b290b0de3e994f8eefa1f8e44..2e9a7ef48cae09f63c3ac0735c316f1c64fc1bcf 100644 (file)
--- a/folly/Benchmark.cpp
+++ b/folly/Benchmark.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -16,104 +16,97 @@
  
  // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
  
-#include "Benchmark.h"
-#include "Foreach.h"
-#include "String.h"
+#include <folly/Benchmark.h>
+
  #include <algorithm>
  #include <cmath>
+#include <cstring>
  #include <iostream>
  #include <limits>
+#include <map>
+#include <memory>
  #include <utility>
  #include <vector>
  
+#include <boost/regex.hpp>
+
+#include <folly/MapUtil.h>
+#include <folly/String.h>
+#include <folly/container/Foreach.h>
+#include <folly/json.h>
+
  using namespace std;
  
  DEFINE_bool(benchmark, false, "Run benchmarks.");
+DEFINE_bool(json, false, "Output in JSON format.");
+DEFINE_bool(json_verbose, false, "Output in verbose JSON format.");
+
+DEFINE_string(
+    bm_regex,
+    "",
+    "Only benchmarks whose names match this regex will be run.");
+
+DEFINE_int64(
+    bm_min_usec,
+    100,
+    "Minimum # of microseconds we'll accept for each benchmark.");
+
+DEFINE_int32(
+    bm_min_iters,
+    1,
+    "Minimum # of iterations we'll try for each benchmark.");
+
+DEFINE_int64(
+    bm_max_iters,
+    1 << 30,
+    "Maximum # of iterations we'll try for each benchmark.");
+
+DEFINE_int32(
+    bm_max_secs,
+    1,
+    "Maximum # of seconds we'll spend on each benchmark.");
  
  namespace folly {
  
-BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
+std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
  
-typedef function<uint64_t(unsigned int)> BenchmarkFun;
-static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;
+typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
  
-// Add the global baseline
-BENCHMARK(globalBenchmarkBaseline) {
-  asm volatile("");
+vector<detail::BenchmarkRegistration>& benchmarks() {
+  static vector<detail::BenchmarkRegistration> _benchmarks;
+  return _benchmarks;
  }
  
-void detail::addBenchmarkImpl(const char* file, const char* name,
-                              BenchmarkFun fun) {
-  benchmarks.emplace_back(file, name, std::move(fun));
-}
+#define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline
+#define FB_STRINGIZE_X2(x) FB_STRINGIZE(x)
  
-/**
- * Given a point, gives density at that point as a number 0.0 < x <=
- * 1.0. The result is 1.0 if all samples are equal to where, and
- * decreases near 0 if all points are far away from it. The density is
- * computed with the help of a radial basis function.
- */
-static double density(const double * begin, const double *const end,
-                      const double where, const double bandwidth) {
-  assert(begin < end);
-  assert(bandwidth > 0.0);
-  double sum = 0.0;
-  FOR_EACH_RANGE (i, begin, end) {
-    auto d = (*i - where) / bandwidth;
-    sum += exp(- d * d);
-  }
-  return sum / (end - begin);
+// Add the global baseline
+BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) {
+#ifdef _MSC_VER
+  _ReadWriteBarrier();
+#else
+  asm volatile("");
+#endif
  }
  
-/**
- * Computes mean and variance for a bunch of data points. Note that
- * mean is currently not being used.
- */
-static pair<double, double>
-meanVariance(const double * begin, const double *const end) {
-  assert(begin < end);
-  double sum = 0.0, sum2 = 0.0;
-  FOR_EACH_RANGE (i, begin, end) {
-    sum += *i;
-    sum2 += *i * *i;
-  }
-  auto const n = end - begin;
-  return make_pair(sum / n, sqrt((sum2 - sum * sum / n) / n));
+size_t getGlobalBenchmarkBaselineIndex() {
+  const char *global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
+  auto it = std::find_if(
+      benchmarks().begin(),
+      benchmarks().end(),
+      [global](const detail::BenchmarkRegistration& v) {
+        return v.name == global;
+      });
+  CHECK(it != benchmarks().end());
+  return size_t(std::distance(benchmarks().begin(), it));
  }
  
-/**
- * Computes the mode of a sample set through brute force. Assumes
- * input is sorted.
- */
-static double mode(const double * begin, const double *const end) {
-  assert(begin < end);
-  // Lower bound and upper bound for result and their respective
-  // densities.
-  auto
-    result = 0.0,
-    bestDensity = 0.0;
-
-  // Get the variance so we pass it down to density()
-  auto const sigma = meanVariance(begin, end).second;
-  if (!sigma) {
-    // No variance means constant signal
-    return *begin;
-  }
-
-  FOR_EACH_RANGE (i, begin, end) {
-    assert(i == begin || *i >= i[-1]);
-    auto candidate = density(begin, end, *i, sigma * sqrt(2.0));
-    if (candidate > bestDensity) {
-      // Found a new best
-      bestDensity = candidate;
-      result = *i;
-    } else {
-      // Density is decreasing... we could break here if we definitely
-      // knew this is unimodal.
-    }
-  }
+#undef FB_STRINGIZE_X2
+#undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
  
-  return result;
+void detail::addBenchmarkImpl(const char* file, const char* name,
+                              BenchmarkFun fun) {
+  benchmarks().push_back({file, name, std::move(fun)});
  }
  
  /**
@@ -124,103 +117,57 @@ static double estimateTime(double * begin, double * end) {
  
    // Current state of the art: get the minimum. After some
    // experimentation, it seems taking the minimum is the best.
-
    return *min_element(begin, end);
-
-  // What follows after estimates the time as the mode of the
-  // distribution.
-
-  // Select the awesomest (i.e. most frequent) result. We do this by
-  // sorting and then computing the longest run length.
-  sort(begin, end);
-
-  // Eliminate outliers. A time much larger than the minimum time is
-  // considered an outlier.
-  while (end[-1] > 2.0 * *begin) {
-    --end;
-    if (begin == end) {
-      LOG(INFO) << *begin;
-    }
-    assert(begin < end);
-  }
-
-  double result = 0;
-
-  /* Code used just for comparison purposes */ {
-    unsigned bestFrequency = 0;
-    unsigned candidateFrequency = 1;
-    double candidateValue = *begin;
-    for (auto current = begin + 1; ; ++current) {
-      if (current == end || *current != candidateValue) {
-        // Done with the current run, see if it was best
-        if (candidateFrequency > bestFrequency) {
-          bestFrequency = candidateFrequency;
-          result = candidateValue;
-        }
-        if (current == end) {
-          break;
-        }
-        // Start a new run
-        candidateValue = *current;
-        candidateFrequency = 1;
-      } else {
-        // Cool, inside a run, increase the frequency
-        ++candidateFrequency;
-      }
-    }
-  }
-
-  result = mode(begin, end);
-
-  return result;
  }
  
  static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
                                              const double globalBaseline) {
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::microseconds;
+  using std::chrono::nanoseconds;
+  using std::chrono::seconds;
+
    // They key here is accuracy; too low numbers means the accuracy was
    // coarse. We up the ante until we get to at least minNanoseconds
    // timings.
-  static uint64_t resolutionInNs = 0, coarseResolutionInNs = 0;
-  if (!resolutionInNs) {
-    timespec ts;
-    CHECK_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts));
-    CHECK_EQ(0, ts.tv_sec) << "Clock sucks.";
-    CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good.";
-    CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
-    resolutionInNs = ts.tv_nsec;
-  }
-  // Whe choose a minimum minimum (sic) of 10,000 nanoseconds, but if
+  static_assert(
+      std::is_same<high_resolution_clock::duration, nanoseconds>::value,
+      "High resolution clock must be nanosecond resolution.");
+  // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
    // the clock resolution is worse than that, it will be larger. In
    // essence we're aiming at making the quantization noise 0.01%.
-  static const auto minNanoseconds = min(resolutionInNs * 100000, 1000000000UL);
+  static const auto minNanoseconds = std::max<nanoseconds>(
+      nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
  
    // We do measurements in several epochs and take the minimum, to
    // account for jitter.
    static const unsigned int epochs = 1000;
    // We establish a total time budget as we don't want a measurement
    // to take too long. This will curtail the number of actual epochs.
-  static const uint64_t timeBudgetInNs = 1000000000;
-  timespec global;
-  CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
+  const auto timeBudget = seconds(FLAGS_bm_max_secs);
+  auto global = high_resolution_clock::now();
  
    double epochResults[epochs] = { 0 };
    size_t actualEpochs = 0;
  
    for (; actualEpochs < epochs; ++actualEpochs) {
-    for (unsigned int n = 1; n < (1U << 30); n *= 2) {
-      auto const nsecs = fun(n);
-      if (nsecs < minNanoseconds) {
+    const auto maxIters = uint32_t(FLAGS_bm_max_iters);
+    for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= 2) {
+      auto const nsecsAndIter = fun(static_cast<unsigned int>(n));
+      if (nsecsAndIter.first < minNanoseconds) {
          continue;
        }
        // We got an accurate enough timing, done. But only save if
        // smaller than the current result.
-      epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline);
+      auto nsecs = duration_cast<nanoseconds>(nsecsAndIter.first).count();
+      epochResults[actualEpochs] =
+          max(0.0, double(nsecs) / nsecsAndIter.second - globalBaseline);
        // Done with the current epoch, we got a meaningful timing.
        break;
      }
-    timespec now;
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
-    if (detail::timespecDiff(now, global) >= timeBudgetInNs) {
+    auto now = high_resolution_clock::now();
+    if (now - global >= timeBudget) {
        // No more time budget available.
        ++actualEpochs;
        break;
@@ -232,79 +179,80 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
    return max(0.0, estimateTime(epochResults, epochResults + actualEpochs));
  }
  
-static string humanReadable(double n, unsigned int decimals) {
-  auto a = fabs(n);
-  char suffix = ' ';
-
-  if (a >= 1E21) {
-    // Too big to be comprehended by the puny human brain
-    suffix = '!';
-    n /= 1E21;
-  } else if (a >= 1E18) {
-    // "EXA" written with suffix 'X' so as to not create confusion
-    // with scientific notation.
-    suffix = 'X';
-    n /= 1E18;
-  } else if (a >= 1E15) {
-    // "PETA"
-    suffix = 'P';
-    n /= 1E15;
-  } else if (a >= 1E12) {
-    // "TERA"
-    suffix = 'T';
-    n /= 1E12;
-  } else if (a >= 1E9) {
-    // "GIGA"
-    suffix = 'G';
-    n /= 1E9;
-  } else if (a >= 1E6) {
-    // "MEGA"
-    suffix = 'M';
-    n /= 1E6;
-  } else if (a >= 1E3) {
-    // "KILO"
-    suffix = 'K';
-    n /= 1E3;
-  } else if (a == 0.0) {
-    suffix = ' ';
-  } else if (a < 1E-15) {
-    // too small
-    suffix = '?';
-    n *= 1E18;
-  } else if (a < 1E-12) {
-    // "femto"
-    suffix = 'f';
-    n *= 1E15;
-  } else if (a < 1E-9) {
-    // "pico"
-    suffix = 'p';
-    n *= 1E12;
-  } else if (a < 1E-6) {
-    // "nano"
-    suffix = 'n';
-    n *= 1E9;
-  } else if (a < 1E-3) {
-    // "micro"
-    suffix = 'u';
-    n *= 1E6;
-  } else if (a < 1) {
-    // "mili"
-    suffix = 'm';
-    n *= 1E3;
+struct ScaleInfo {
+  double boundary;
+  const char* suffix;
+};
+
+static const ScaleInfo kTimeSuffixes[] {
+  { 365.25 * 24 * 3600, "years" },
+  { 24 * 3600, "days" },
+  { 3600, "hr" },
+  { 60, "min" },
+  { 1, "s" },
+  { 1E-3, "ms" },
+  { 1E-6, "us" },
+  { 1E-9, "ns" },
+  { 1E-12, "ps" },
+  { 1E-15, "fs" },
+  { 0, nullptr },
+};
+
+static const ScaleInfo kMetricSuffixes[] {
+  { 1E24, "Y" },  // yotta
+  { 1E21, "Z" },  // zetta
+  { 1E18, "X" },  // "exa" written with suffix 'X' so as to not create
+                  //   confusion with scientific notation
+  { 1E15, "P" },  // peta
+  { 1E12, "T" },  // terra
+  { 1E9, "G" },   // giga
+  { 1E6, "M" },   // mega
+  { 1E3, "K" },   // kilo
+  { 1, "" },
+  { 1E-3, "m" },  // milli
+  { 1E-6, "u" },  // micro
+  { 1E-9, "n" },  // nano
+  { 1E-12, "p" }, // pico
+  { 1E-15, "f" }, // femto
+  { 1E-18, "a" }, // atto
+  { 1E-21, "z" }, // zepto
+  { 1E-24, "y" }, // yocto
+  { 0, nullptr },
+};
+
+static string humanReadable(double n, unsigned int decimals,
+                            const ScaleInfo* scales) {
+  if (std::isinf(n) || std::isnan(n)) {
+    return folly::to<string>(n);
    }
  
-  return stringPrintf("%*.*f%c", decimals + 3 + 1, decimals, n, suffix);
+  const double absValue = fabs(n);
+  const ScaleInfo* scale = scales;
+  while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
+    ++scale;
+  }
+
+  const double scaledValue = n / scale->boundary;
+  return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
+}
+
+static string readableTime(double n, unsigned int decimals) {
+  return humanReadable(n, decimals, kTimeSuffixes);
  }
  
-static void printBenchmarkResults(
-  const vector<tuple<const char*, const char*, double> >& data) {
+static string metricReadable(double n, unsigned int decimals) {
+  return humanReadable(n, decimals, kMetricSuffixes);
+}
+
+static void printBenchmarkResultsAsTable(
+    const vector<detail::BenchmarkResult>& data) {
    // Width available
-  static const uint columns = 76;
+  static const unsigned int columns = 76;
  
    // Compute the longest benchmark name
    size_t longestName = 0;
-  FOR_EACH_RANGE (i, 1, benchmarks.size()) {
-    longestName = max(longestName, strlen(get<1>(benchmarks[i])));
+  for (auto& bm : benchmarks()) {
+    longestName = max(longestName, bm.name.size());
    }
  
    // Print a horizontal rule
@@ -313,25 +261,25 @@ static void printBenchmarkResults(
    };
  
    // Print header for a file
-  auto header = [&](const char* file) {
+  auto header = [&](const string& file) {
      separator('=');
-    printf("%-*srelative  ns/iter  iters/s\n",
-           columns - 26, file);
+    printf("%-*srelative  time/iter  iters/s\n",
+           columns - 28, file.c_str());
      separator('=');
    };
  
    double baselineNsPerIter = numeric_limits<double>::max();
-  const char* lastFile = "";
+  string lastFile;
  
    for (auto& datum : data) {
-    auto file = get<0>(datum);
-    if (strcmp(file, lastFile)) {
+    auto file = datum.file;
+    if (file != lastFile) {
        // New file starting
        header(file);
        lastFile = file;
      }
  
-    string s = get<1>(datum);
+    string s = datum.name;
      if (s == "-") {
        separator('-');
        continue;
@@ -341,48 +289,191 @@ static void printBenchmarkResults(
        s.erase(0, 1);
        useBaseline = true;
      } else {
-      baselineNsPerIter = get<2>(datum);
+      baselineNsPerIter = datum.timeInNs;
        useBaseline = false;
      }
-    s.resize(columns - 27, ' ');
-    auto nsPerIter = get<2>(datum);
-    auto itersPerSec = 1E9 / nsPerIter;
+    s.resize(columns - 29, ' ');
+    auto nsPerIter = datum.timeInNs;
+    auto secPerIter = nsPerIter / 1E9;
+    auto itersPerSec = (secPerIter == 0)
+                           ? std::numeric_limits<double>::infinity()
+                           : (1 / secPerIter);
      if (!useBaseline) {
        // Print without baseline
-      printf("%*s           %s  %s\n",
+      printf("%*s           %9s  %7s\n",
               static_cast<int>(s.size()), s.c_str(),
-             humanReadable(nsPerIter, 2).c_str(),
-             humanReadable(itersPerSec, 2).c_str());
+             readableTime(secPerIter, 2).c_str(),
+             metricReadable(itersPerSec, 2).c_str());
      } else {
        // Print with baseline
        auto rel = baselineNsPerIter / nsPerIter * 100.0;
-      printf("%*s %7.2f%%  %s  %s\n",
+      printf("%*s %7.2f%%  %9s  %7s\n",
               static_cast<int>(s.size()), s.c_str(),
               rel,
-             humanReadable(nsPerIter, 2).c_str(),
-             humanReadable(itersPerSec, 2).c_str());
+             readableTime(secPerIter, 2).c_str(),
+             metricReadable(itersPerSec, 2).c_str());
+    }
+  }
+  separator('=');
+}
+
+static void printBenchmarkResultsAsJson(
+    const vector<detail::BenchmarkResult>& data) {
+  dynamic d = dynamic::object;
+  for (auto& datum: data) {
+    d[datum.name] = datum.timeInNs * 1000.;
+  }
+
+  printf("%s\n", toPrettyJson(d).c_str());
+}
+
+static void printBenchmarkResultsAsVerboseJson(
+    const vector<detail::BenchmarkResult>& data) {
+  dynamic d;
+  benchmarkResultsToDynamic(data, d);
+  printf("%s\n", toPrettyJson(d).c_str());
+}
+
+static void printBenchmarkResults(const vector<detail::BenchmarkResult>& data) {
+  if (FLAGS_json_verbose) {
+    printBenchmarkResultsAsVerboseJson(data);
+  } else if (FLAGS_json) {
+    printBenchmarkResultsAsJson(data);
+  } else {
+    printBenchmarkResultsAsTable(data);
+  }
+}
+
+void benchmarkResultsToDynamic(
+    const vector<detail::BenchmarkResult>& data,
+    dynamic& out) {
+  out = dynamic::array;
+  for (auto& datum : data) {
+    out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
+  }
+}
+
+void benchmarkResultsFromDynamic(
+    const dynamic& d,
+    vector<detail::BenchmarkResult>& results) {
+  for (auto& datum : d) {
+    results.push_back(
+        {datum[0].asString(), datum[1].asString(), datum[2].asDouble()});
+  }
+}
+
+static pair<StringPiece, StringPiece> resultKey(
+    const detail::BenchmarkResult& result) {
+  return pair<StringPiece, StringPiece>(result.file, result.name);
+}
+
+void printResultComparison(
+    const vector<detail::BenchmarkResult>& base,
+    const vector<detail::BenchmarkResult>& test) {
+  map<pair<StringPiece, StringPiece>, double> baselines;
+
+  for (auto& baseResult : base) {
+    baselines[resultKey(baseResult)] = baseResult.timeInNs;
+  }
+  //
+  // Width available
+  static const unsigned int columns = 76;
+
+  // Compute the longest benchmark name
+  size_t longestName = 0;
+  for (auto& datum : test) {
+    longestName = max(longestName, datum.name.size());
+  }
+
+  // Print a horizontal rule
+  auto separator = [&](char pad) { puts(string(columns, pad).c_str()); };
+
+  // Print header for a file
+  auto header = [&](const string& file) {
+    separator('=');
+    printf("%-*srelative  time/iter  iters/s\n", columns - 28, file.c_str());
+    separator('=');
+  };
+
+  string lastFile;
+
+  for (auto& datum : test) {
+    folly::Optional<double> baseline =
+        folly::get_optional(baselines, resultKey(datum));
+    auto file = datum.file;
+    if (file != lastFile) {
+      // New file starting
+      header(file);
+      lastFile = file;
+    }
+
+    string s = datum.name;
+    if (s == "-") {
+      separator('-');
+      continue;
+    }
+    if (s[0] == '%') {
+      s.erase(0, 1);
+    }
+    s.resize(columns - 29, ' ');
+    auto nsPerIter = datum.timeInNs;
+    auto secPerIter = nsPerIter / 1E9;
+    auto itersPerSec = (secPerIter == 0)
+        ? std::numeric_limits<double>::infinity()
+        : (1 / secPerIter);
+    if (!baseline) {
+      // Print without baseline
+      printf(
+          "%*s           %9s  %7s\n",
+          static_cast<int>(s.size()),
+          s.c_str(),
+          readableTime(secPerIter, 2).c_str(),
+          metricReadable(itersPerSec, 2).c_str());
+    } else {
+      // Print with baseline
+      auto rel = *baseline / nsPerIter * 100.0;
+      printf(
+          "%*s %7.2f%%  %9s  %7s\n",
+          static_cast<int>(s.size()),
+          s.c_str(),
+          rel,
+          readableTime(secPerIter, 2).c_str(),
+          metricReadable(itersPerSec, 2).c_str());
      }
    }
    separator('=');
  }
  
  void runBenchmarks() {
-  CHECK(!benchmarks.empty());
+  CHECK(!benchmarks().empty());
+
+  vector<detail::BenchmarkResult> results;
+  results.reserve(benchmarks().size() - 1);
  
-  vector<tuple<const char*, const char*, double>> results;
-  results.reserve(benchmarks.size() - 1);
+  std::unique_ptr<boost::regex> bmRegex;
+  if (!FLAGS_bm_regex.empty()) {
+    bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
+  }
  
    // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
  
-  auto const globalBaseline = runBenchmarkGetNSPerIteration(
-    get<2>(benchmarks.front()), 0);
-  FOR_EACH_RANGE (i, 1, benchmarks.size()) {
-    auto elapsed = strcmp(get<1>(benchmarks[i]), "-") == 0
-      ? 0.0 // skip the separators
-      : runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
-                                      globalBaseline);
-    results.emplace_back(get<0>(benchmarks[i]),
-                         get<1>(benchmarks[i]), elapsed);
+  size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
+
+  auto const globalBaseline =
+      runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, 0);
+  FOR_EACH_RANGE (i, 0, benchmarks().size()) {
+    if (i == baselineIndex) {
+      continue;
+    }
+    double elapsed = 0.0;
+    auto& bm = benchmarks()[i];
+    if (bm.name != "-") { // skip separators
+      if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
+        continue;
+      }
+      elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline);
+    }
+    results.push_back({bm.file, bm.name, elapsed});
    }
  
    // PLEASE MAKE NOISE. MEASUREMENTS DONE.