From bca34971c398bb8f5f258dba57206068bfd29937 Mon Sep 17 00:00:00 2001 From: Yedidya Feldblum Date: Sat, 9 Dec 2017 09:50:25 -0800 Subject: [PATCH] Split Baton wait methods into fast and slow paths Summary: [Folly] Split `Baton` wait methods into fast and slow paths. Force-inline the fast paths, and force-outline the slow paths. Reviewed By: magedm Differential Revision: D6501992 fbshipit-source-id: 611e26b3cfeef01eb8d3a3500ae3ebc26bee6e86 --- folly/synchronization/Baton.h | 186 +++++++++++++++++++--------------- 1 file changed, 106 insertions(+), 80 deletions(-) diff --git a/folly/synchronization/Baton.h b/folly/synchronization/Baton.h index 8a2c0c2c..ef871252 100644 --- a/folly/synchronization/Baton.h +++ b/folly/synchronization/Baton.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -199,55 +200,12 @@ struct Baton { /// could be relaxed somewhat without any perf or size regressions, /// but by making this condition very restrictive we can provide better /// checking in debug builds. - void wait() { - if (spinWaitForEarlyDelivery()) { - assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY); - return; - } - - if (!Blocking) { - while (!try_wait()) { - std::this_thread::yield(); - } - return; - } - - // guess we have to block :( - uint32_t expected = INIT; - if (!state_.compare_exchange_strong(expected, WAITING)) { - // CAS failed, last minute reprieve - assert(expected == EARLY_DELIVERY); + FOLLY_ALWAYS_INLINE void wait() { + if (try_wait()) { return; } - while (true) { - detail::MemoryIdler::futexWait(state_, WAITING); - - // state_ is the truth even if FUTEX_WAIT reported a matching - // FUTEX_WAKE, since we aren't using type-stable storage and we - // don't guarantee reuse. The scenario goes like this: thread - // A's last touch of a Baton is a call to wake(), which stores - // LATE_DELIVERY and gets an unlucky context switch before delivering - // the corresponding futexWake. Thread B sees LATE_DELIVERY - // without consuming a futex event, because it calls futexWait - // with an expected value of WAITING and hence doesn't go to sleep. - // B returns, so the Baton's memory is reused and becomes another - // Baton (or a reuse of this one). B calls futexWait on the new - // Baton lifetime, then A wakes up and delivers a spurious futexWake - // to the same memory location. B's futexWait will then report a - // consumed wake event even though state_ is still WAITING. - // - // It would be possible to add an extra state_ dance to communicate - // that the futexWake has been sent so that we can be sure to consume - // it before returning, but that would be a perf and complexity hit. - uint32_t s = state_.load(std::memory_order_acquire); - assert(s == WAITING || s == LATE_DELIVERY); - - if (s == LATE_DELIVERY) { - return; - } - // retry - } + waitSlow(); } /// Similar to wait, but doesn't block the thread if it hasn't been posted. @@ -261,10 +219,10 @@ struct Baton { /// call wait, try_wait or timed_wait on the same baton without resetting /// /// @return true if baton has been posted, false othewise - bool try_wait() const { + FOLLY_ALWAYS_INLINE bool try_wait() const { auto s = state_.load(std::memory_order_acquire); assert(s == INIT || s == EARLY_DELIVERY); - return s == EARLY_DELIVERY; + return LIKELY(s == EARLY_DELIVERY); } /// Similar to wait, but with a timeout. The thread is unblocked if the @@ -279,12 +237,17 @@ struct Baton { /// @return true if the baton was posted to before timeout, /// false otherwise template - bool try_wait_for(const std::chrono::duration& timeout) { + FOLLY_ALWAYS_INLINE bool try_wait_for( + const std::chrono::duration& timeout) { static_assert( Blocking, "Non-blocking Baton does not support try_wait_for."); + if (try_wait()) { + return true; + } + auto deadline = std::chrono::steady_clock::now() + timeout; - return try_wait_until(deadline); + return tryWaitUntilSlow(deadline); } /// Similar to wait, but with a deadline. The thread is unblocked if the @@ -299,48 +262,29 @@ struct Baton { /// @return true if the baton was posted to before deadline, /// false otherwise template - bool try_wait_until( + FOLLY_ALWAYS_INLINE bool try_wait_until( const std::chrono::time_point& deadline) { static_assert( Blocking, "Non-blocking Baton does not support try_wait_until."); - if (spinWaitForEarlyDelivery()) { - assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY); - return true; - } - - // guess we have to block :( - uint32_t expected = INIT; - if (!state_.compare_exchange_strong(expected, WAITING)) { - // CAS failed, last minute reprieve - assert(expected == EARLY_DELIVERY); + if (try_wait()) { return true; } - while (true) { - auto rv = state_.futexWaitUntil(WAITING, deadline); - if (rv == folly::detail::FutexResult::TIMEDOUT) { - state_.store(TIMED_OUT, std::memory_order_release); - return false; - } - - uint32_t s = state_.load(std::memory_order_acquire); - assert(s == WAITING || s == LATE_DELIVERY); - if (s == LATE_DELIVERY) { - return true; - } - } + return tryWaitUntilSlow(deadline); } /// Alias to try_wait_for. Deprecated. template - bool timed_wait(const std::chrono::duration& timeout) { + FOLLY_ALWAYS_INLINE bool timed_wait( + const std::chrono::duration& timeout) { return try_wait_for(timeout); } /// Alias to try_wait_until. Deprecated. template - bool timed_wait(const std::chrono::time_point& deadline) { + FOLLY_ALWAYS_INLINE bool timed_wait( + const std::chrono::time_point& deadline) { return try_wait_until(deadline); } @@ -350,7 +294,7 @@ struct Baton { EARLY_DELIVERY = 1, WAITING = 2, LATE_DELIVERY = 3, - TIMED_OUT = 4 + TIMED_OUT = 4, }; enum { @@ -377,14 +321,14 @@ struct Baton { // false otherwise. If the function returns true then // state_ is guaranteed to be EARLY_DELIVERY bool spinWaitForEarlyDelivery() { - - static_assert(PreBlockAttempts > 0, + static_assert( + PreBlockAttempts > 0, "isn't this assert clearer than an uninitialized variable warning?"); for (int i = 0; i < PreBlockAttempts; ++i) { if (try_wait()) { - // hooray! return true; } + // The pause instruction is the polite way to spin, but it doesn't // actually affect correctness to omit it if we don't have it. // Pausing donates the full capabilities of the current core to @@ -395,6 +339,88 @@ struct Baton { return false; } + FOLLY_NOINLINE void waitSlow() { + if (spinWaitForEarlyDelivery()) { + assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY); + return; + } + + if (!Blocking) { + while (!try_wait()) { + std::this_thread::yield(); + } + return; + } + + // guess we have to block :( + uint32_t expected = INIT; + if (!state_.compare_exchange_strong(expected, WAITING)) { + // CAS failed, last minute reprieve + assert(expected == EARLY_DELIVERY); + return; + } + + while (true) { + detail::MemoryIdler::futexWait(state_, WAITING); + + // state_ is the truth even if FUTEX_WAIT reported a matching + // FUTEX_WAKE, since we aren't using type-stable storage and we + // don't guarantee reuse. The scenario goes like this: thread + // A's last touch of a Baton is a call to wake(), which stores + // LATE_DELIVERY and gets an unlucky context switch before delivering + // the corresponding futexWake. Thread B sees LATE_DELIVERY + // without consuming a futex event, because it calls futexWait + // with an expected value of WAITING and hence doesn't go to sleep. + // B returns, so the Baton's memory is reused and becomes another + // Baton (or a reuse of this one). B calls futexWait on the new + // Baton lifetime, then A wakes up and delivers a spurious futexWake + // to the same memory location. B's futexWait will then report a + // consumed wake event even though state_ is still WAITING. + // + // It would be possible to add an extra state_ dance to communicate + // that the futexWake has been sent so that we can be sure to consume + // it before returning, but that would be a perf and complexity hit. + uint32_t s = state_.load(std::memory_order_acquire); + assert(s == WAITING || s == LATE_DELIVERY); + + if (s == LATE_DELIVERY) { + return; + } + // retry + } + } + + template + FOLLY_NOINLINE bool tryWaitUntilSlow( + const std::chrono::time_point& deadline) { + if (spinWaitForEarlyDelivery()) { + assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY); + return true; + } + + // guess we have to block :( + uint32_t expected = INIT; + if (!state_.compare_exchange_strong(expected, WAITING)) { + // CAS failed, last minute reprieve + assert(expected == EARLY_DELIVERY); + return true; + } + + while (true) { + auto rv = state_.futexWaitUntil(WAITING, deadline); + if (rv == folly::detail::FutexResult::TIMEDOUT) { + state_.store(TIMED_OUT, std::memory_order_release); + return false; + } + + uint32_t s = state_.load(std::memory_order_acquire); + assert(s == WAITING || s == LATE_DELIVERY); + if (s == LATE_DELIVERY) { + return true; + } + } + } + detail::Futex state_; }; -- 2.34.1