From 2ac160c2a7e6fcf12a0db18093fa39e8f01ffc48 Mon Sep 17 00:00:00 2001 From: Yedidya Feldblum Date: Tue, 9 Jan 2018 20:11:18 -0800 Subject: [PATCH] Baton support for wait-options Summary: [Folly] Baton support for wait-options Reviewed By: nbronson Differential Revision: D6591168 fbshipit-source-id: beca8422ac0daa572fb43c371923a86f199999f9 --- folly/Makefile.am | 1 + folly/concurrency/UnboundedQueue.h | 5 +- folly/synchronization/Baton.h | 72 +++++++++++++++-------------- folly/synchronization/WaitOptions.h | 4 +- 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/folly/Makefile.am b/folly/Makefile.am index c1327fc4..60a66c8e 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -439,6 +439,7 @@ nobase_follyinclude_HEADERS = \ synchronization/ParkingLot.h \ synchronization/RWSpinLock.h \ synchronization/SaturatingSemaphore.h \ + synchronization/WaitOptions.h \ synchronization/detail/AtomicUtils.h \ synchronization/detail/Sleeper.h \ system/MemoryMapping.h \ diff --git a/folly/concurrency/UnboundedQueue.h b/folly/concurrency/UnboundedQueue.h index 1aa2f416..22e94e07 100644 --- a/folly/concurrency/UnboundedQueue.h +++ b/folly/concurrency/UnboundedQueue.h @@ -623,7 +623,10 @@ class UnboundedQueue { template FOLLY_ALWAYS_INLINE bool tryWaitUntil( const std::chrono::time_point& deadline) noexcept { - return flag_.try_wait_until(deadline); + // wait-options from benchmarks on contended queues: + auto const opt = + flag_.wait_options().spin_max(std::chrono::microseconds(10)); + return flag_.try_wait_until(deadline, opt); } private: diff --git a/folly/synchronization/Baton.h b/folly/synchronization/Baton.h index b3b1e09e..f483bd5d 100644 --- a/folly/synchronization/Baton.h +++ b/folly/synchronization/Baton.h @@ -26,6 +26,7 @@ #include #include #include +#include namespace folly { @@ -51,7 +52,12 @@ namespace folly { /// lifecycle we can also add a bunch of assertions that can help to /// catch race conditions ahead of time. template class Atom = std::atomic> -struct Baton { +class Baton { + public: + FOLLY_ALWAYS_INLINE static WaitOptions wait_options() { + return {}; + } + constexpr Baton() noexcept : state_(INIT) {} Baton(Baton const&) = delete; @@ -160,12 +166,13 @@ struct Baton { /// could be relaxed somewhat without any perf or size regressions, /// but by making this condition very restrictive we can provide better /// checking in debug builds. - FOLLY_ALWAYS_INLINE void wait() noexcept { + FOLLY_ALWAYS_INLINE + void wait(const WaitOptions& opt = wait_options()) noexcept { if (try_wait()) { return; } - waitSlow(); + waitSlow(opt); } /// Similar to wait, but doesn't block the thread if it hasn't been posted. @@ -196,13 +203,14 @@ struct Baton { /// false otherwise template FOLLY_ALWAYS_INLINE bool try_wait_for( - const std::chrono::duration& timeout) noexcept { + const std::chrono::duration& timeout, + const WaitOptions& opt = wait_options()) noexcept { if (try_wait()) { return true; } auto deadline = std::chrono::steady_clock::now() + timeout; - return tryWaitUntilSlow(deadline); + return tryWaitUntilSlow(deadline, opt); } /// Similar to wait, but with a deadline. The thread is unblocked if the @@ -218,12 +226,13 @@ struct Baton { /// false otherwise template FOLLY_ALWAYS_INLINE bool try_wait_until( - const std::chrono::time_point& deadline) noexcept { + const std::chrono::time_point& deadline, + const WaitOptions& opt = wait_options()) noexcept { if (try_wait()) { return true; } - return tryWaitUntilSlow(deadline); + return tryWaitUntilSlow(deadline, opt); } /// Alias to try_wait_for. Deprecated. @@ -249,52 +258,44 @@ struct Baton { TIMED_OUT = 4, }; - enum { - // Must be positive. If multiple threads are actively using a - // higher-level data structure that uses batons internally, it is - // likely that the post() and wait() calls happen almost at the same - // time. In this state, we lose big 50% of the time if the wait goes - // to sleep immediately. On circa-2013 devbox hardware it costs about - // 7 usec to FUTEX_WAIT and then be awoken (half the t/iter as the - // posix_sem_pingpong test in BatonTests). We can improve our chances - // of EARLY_DELIVERY by spinning for a bit, although we have to balance - // this against the loss if we end up sleeping any way. Spins on this - // hw take about 7 nanos (all but 0.5 nanos is the pause instruction). - // We give ourself 300 spins, which is about 2 usec of waiting. As a - // partial consolation, since we are using the pause instruction we - // are giving a speed boost to the colocated hyperthread. - PreBlockAttempts = 300, - }; - - // Spin for "some time" (see discussion on PreBlockAttempts) waiting - // for a post. + // Spin for "some time" // // @return true if we received an early delivery during the wait, // false otherwise. If the function returns true then // state_ is guaranteed to be EARLY_DELIVERY template bool spinWaitForEarlyDelivery( - const std::chrono::time_point& deadline) noexcept { - for (int i = 0; i < PreBlockAttempts; ++i) { + const std::chrono::time_point& deadline, + const WaitOptions& opt) noexcept { + auto tbegin = Clock::now(); + while (true) { if (try_wait()) { return true; } - if (Clock::now() >= deadline) { + + auto const tnow = Clock::now(); + if (tnow >= deadline) { + return false; + } + + // Backward time discontinuity in Clock? revise pre_block starting point + tbegin = std::min(tbegin, tnow); + auto const dur = std::chrono::duration_cast(tnow - tbegin); + if (dur >= opt.spin_max()) { return false; } + // The pause instruction is the polite way to spin, but it doesn't // actually affect correctness to omit it if we don't have it. // Pausing donates the full capabilities of the current core to // its other hyperthreads for a dozen cycles or so asm_volatile_pause(); } - - return false; } - FOLLY_NOINLINE void waitSlow() noexcept { + FOLLY_NOINLINE void waitSlow(const WaitOptions& opt) noexcept { auto const deadline = std::chrono::steady_clock::time_point::max(); - if (spinWaitForEarlyDelivery(deadline)) { + if (spinWaitForEarlyDelivery(deadline, opt)) { assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY); return; } @@ -346,8 +347,9 @@ struct Baton { template FOLLY_NOINLINE bool tryWaitUntilSlow( - const std::chrono::time_point& deadline) noexcept { - if (spinWaitForEarlyDelivery(deadline)) { + const std::chrono::time_point& deadline, + const WaitOptions& opt) noexcept { + if (spinWaitForEarlyDelivery(deadline, opt)) { assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY); return true; } diff --git a/folly/synchronization/WaitOptions.h b/folly/synchronization/WaitOptions.h index a729b4e2..cf26f420 100644 --- a/folly/synchronization/WaitOptions.h +++ b/folly/synchronization/WaitOptions.h @@ -44,7 +44,7 @@ class WaitOptions { /// against the extra cpu utilization, latency reduction, power consumption, /// and priority inversion effect if we end up blocking anyway. /// - /// We use a default maximum of 10 usec of spinning. As partial consolation, + /// We use a default maximum of 2 usec of spinning. As partial consolation, /// since spinning as implemented in folly uses the pause instruction where /// available, we give a small speed boost to the colocated hyperthread. /// @@ -52,7 +52,7 @@ class WaitOptions { /// then be awoken. Spins on this hw take about 7 nsec, where all but 0.5 /// nsec is the pause instruction. static constexpr std::chrono::nanoseconds spin_max = - std::chrono::microseconds(10); + std::chrono::microseconds(2); }; std::chrono::nanoseconds spin_max() const { -- 2.34.1