2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/Benchmark.h>
18 #include <folly/Baton.h>
19 #include <folly/futures/Future.h>
20 #include <folly/futures/InlineExecutor.h>
21 #include <folly/futures/Promise.h>
22 #include <folly/portability/GFlags.h>
23 #include <folly/portability/Semaphore.h>
27 using namespace folly;
36 void someThens(size_t n) {
37 auto f = makeFuture<int>(42);
38 for (size_t i = 0; i < n; i++) {
39 f = f.then(incr<int>);
45 BENCHMARK(constantFuture) {
49 BENCHMARK_RELATIVE(promiseAndFuture) {
51 Future<int> f = p.getFuture();
56 BENCHMARK_RELATIVE(withThen) {
58 Future<int> f = p.getFuture().then(incr<int>);
70 // look for >= 50% relative
71 BENCHMARK_RELATIVE(twoThens) {
75 // look for >= 25% relative
76 BENCHMARK_RELATIVE(fourThens) {
80 // look for >= 1% relative
81 BENCHMARK_RELATIVE(hundredThens) {
85 // Lock contention. Although in practice fulfills tend to be temporally
86 // separate from then()s, still sometimes they will be concurrent. So the
87 // higher this number is, the better.
90 BENCHMARK(no_contention) {
91 std::vector<Promise<int>> promises(10000);
92 std::vector<Future<int>> futures;
93 std::thread producer, consumer;
96 folly::Baton<> b1, b2;
97 for (auto& p : promises)
98 futures.push_back(p.getFuture());
100 consumer = std::thread([&]{
102 for (auto& f : futures) f.then(incr<int>);
106 producer = std::thread([&]{
108 for (auto& p : promises) p.setValue(42);
115 // The only thing we are measuring is how long fulfill + callbacks take
119 BENCHMARK_RELATIVE(contention) {
120 std::vector<Promise<int>> promises(10000);
121 std::vector<Future<int>> futures;
122 std::thread producer, consumer;
124 sem_init(&sem, 0, 0);
127 folly::Baton<> b1, b2;
128 for (auto& p : promises)
129 futures.push_back(p.getFuture());
131 consumer = std::thread([&]{
133 for (auto& f : futures) {
139 producer = std::thread([&]{
141 for (auto& p : promises) {
151 // The astute reader will notice that we're not *precisely* comparing apples
152 // to apples here. Well, maybe it's like comparing Granny Smith to
153 // Braeburn or something. In the serial version, we waited for the futures
154 // to be all set up, but here we are probably still doing that work
155 // (although in parallel). But even though there is more work (on the order
156 // of 2x), it is being done by two threads. Hopefully most of the difference
157 // we see is due to lock contention and not false parallelism.
159 // Be warned that if the box is under heavy load, this will greatly skew
160 // these results (scheduling overhead will begin to dwarf lock contention).
161 // I'm not sure but I'd guess in Windtunnel this will mean large variance,
162 // because I expect they load the boxes as much as they can?
167 BENCHMARK_DRAW_LINE();
169 // The old way. Throw an exception, and rethrow to access it upstream.
170 void throwAndCatchImpl() {
172 .then([](Try<Unit>&&){ throw std::runtime_error("oh no"); })
173 .then([](Try<Unit>&& t) {
176 } catch(const std::runtime_error& e) {
184 // Not much better. Throw an exception, and access it via the wrapper upstream.
185 // Actually a little worse due to wrapper overhead. then() won't know that the
186 // exception is a runtime_error, so will have to store it as an exception_ptr
187 // anyways. withException will therefore have to rethrow. Note that if we threw
188 // std::exception instead, we would see some wins, as that's the type then()
189 // will try to wrap, so no exception_ptrs/rethrows are necessary.
190 void throwAndCatchWrappedImpl() {
192 .then([](Try<Unit>&&) { throw std::runtime_error("oh no"); })
193 .then([](Try<Unit>&& t) {
194 auto caught = t.withException<std::runtime_error>(
195 [](const std::runtime_error& /* e */) {
202 // Better. Wrap an exception, and rethrow to access it upstream.
203 void throwWrappedAndCatchImpl() {
205 .then([](Try<Unit>&&){
206 return makeFuture<Unit>(std::runtime_error("oh no"));
208 .then([](Try<Unit>&& t) {
211 } catch(const std::runtime_error& e) {
219 // The new way. Wrap an exception, and access it via the wrapper upstream
220 void throwWrappedAndCatchWrappedImpl() {
222 .then([](Try<Unit>&&) {
223 return makeFuture<Unit>(std::runtime_error("oh no"));
225 .then([](Try<Unit>&& t) {
226 auto caught = t.withException<std::runtime_error>(
227 [](const std::runtime_error& /* e */) {
234 // Simulate heavy contention on func
235 void contend(void(*func)()) {
236 folly::BenchmarkSuspender s;
238 const int iters = 1000;
239 pthread_barrier_t barrier;
240 pthread_barrier_init(&barrier, nullptr, N+1);
241 std::vector<std::thread> threads;
242 for (int i = 0; i < N; i++) {
243 threads.push_back(std::thread([&](){
244 pthread_barrier_wait(&barrier);
245 for (int j = 0; j < iters; j++) {
250 pthread_barrier_wait(&barrier);
252 for (auto& t : threads) {
256 pthread_barrier_destroy(&barrier);
259 BENCHMARK(throwAndCatch) {
263 BENCHMARK_RELATIVE(throwAndCatchWrapped) {
264 throwAndCatchWrappedImpl();
267 BENCHMARK_RELATIVE(throwWrappedAndCatch) {
268 throwWrappedAndCatchImpl();
271 BENCHMARK_RELATIVE(throwWrappedAndCatchWrapped) {
272 throwWrappedAndCatchWrappedImpl();
275 BENCHMARK_DRAW_LINE();
277 BENCHMARK(throwAndCatchContended) {
278 contend(throwAndCatchImpl);
281 BENCHMARK_RELATIVE(throwAndCatchWrappedContended) {
282 contend(throwAndCatchWrappedImpl);
285 BENCHMARK_RELATIVE(throwWrappedAndCatchContended) {
286 contend(throwWrappedAndCatchImpl);
289 BENCHMARK_RELATIVE(throwWrappedAndCatchWrappedContended) {
290 contend(throwWrappedAndCatchWrappedImpl);
293 BENCHMARK_DRAW_LINE();
297 explicit Bulky(std::string message) : message_(message) {}
298 std::string message() & {
301 std::string&& message() && {
302 return std::move(message_);
306 std::string message_;
307 std::array<int, 1024> ints_;
309 } // anonymous namespace
311 BENCHMARK(lvalue_get) {
312 BenchmarkSuspender suspender;
313 Optional<Future<Bulky>> future;
314 future = makeFuture(Bulky("Hello"));
315 suspender.dismissing([&] {
316 std::string message = future.value().get().message();
317 doNotOptimizeAway(message);
321 BENCHMARK_RELATIVE(rvalue_get) {
322 BenchmarkSuspender suspender;
323 Optional<Future<Bulky>> future;
324 future = makeFuture(Bulky("Hello"));
325 suspender.dismissing([&] {
326 std::string message = std::move(future.value()).get().message();
327 doNotOptimizeAway(message);
336 auto f = p.getFuture()
341 return makeFuture(std::move(t));
348 return makeFuture(std::move(t));
355 std::vector<Future<T>> fsGen() {
356 std::vector<Future<T>> fs;
357 for (auto i = 0; i < 10; i++) {
358 fs.push_back(fGen<T>());
364 void complexBenchmark() {
366 collectAll(fsGen<T>());
367 collectAny(fsGen<T>());
368 futures::map(fsGen<T>(), [] (const T& t) {
371 futures::map(fsGen<T>(), [] (const T& t) {
372 return makeFuture(T(t));
376 BENCHMARK_DRAW_LINE();
383 BENCHMARK(complexUnit) {
384 complexBenchmark<Unit>();
387 BENCHMARK_RELATIVE(complexBlob4) {
388 complexBenchmark<Blob<4>>();
391 BENCHMARK_RELATIVE(complexBlob8) {
392 complexBenchmark<Blob<8>>();
395 BENCHMARK_RELATIVE(complexBlob64) {
396 complexBenchmark<Blob<64>>();
399 BENCHMARK_RELATIVE(complexBlob128) {
400 complexBenchmark<Blob<128>>();
403 BENCHMARK_RELATIVE(complexBlob256) {
404 complexBenchmark<Blob<256>>();
407 BENCHMARK_RELATIVE(complexBlob512) {
408 complexBenchmark<Blob<512>>();
411 BENCHMARK_RELATIVE(complexBlob1024) {
412 complexBenchmark<Blob<1024>>();
415 BENCHMARK_RELATIVE(complexBlob2048) {
416 complexBenchmark<Blob<2048>>();
419 BENCHMARK_RELATIVE(complexBlob4096) {
420 complexBenchmark<Blob<4096>>();
423 int main(int argc, char** argv) {
424 gflags::ParseCommandLineFlags(&argc, &argv, true);
425 folly::runBenchmarks();