#include <atomic>
#include <thread>
+#include <folly/Likely.h>
#include <folly/detail/Futex.h>
#include <folly/detail/MemoryIdler.h>
#include <folly/portability/Asm.h>
namespace folly {
-/// A Baton allows a thread to block once and be awoken. The single
-/// poster version (with SinglePoster == true) captures a single
-/// handoff, and during its lifecycle (from construction/reset to
-/// destruction/reset) a baton must either be post()ed and wait()ed
+/// A Baton allows a thread to block once and be awoken. Captures a
+/// single handoff, and during its lifecycle (from construction/reset
+/// to destruction/reset) a baton must either be post()ed and wait()ed
/// exactly once each, or not at all.
///
-/// The multi-poster version (SinglePoster == false) allows multiple
-/// concurrent handoff attempts, the first of which completes the
-/// handoff and the rest if any are idempotent.
-///
/// Baton includes no internal padding, and is only 4 bytes in size.
/// Any alignment or padding to avoid false sharing is up to the user.
///
-/// This is basically a stripped-down semaphore that supports (only a
-/// single call to sem_post, when SinglePoster == true) and a single
-/// call to sem_wait.
+/// This is basically a stripped-down semaphore that supports only a
+/// single call to sem_post and a single call to sem_wait.
///
/// The non-blocking version (Blocking == false) provides more speed
/// by using only load acquire and store release operations in the
/// catch race conditions ahead of time.
template <
template <typename> class Atom = std::atomic,
- bool SinglePoster = true, // single vs multiple posters
bool Blocking = true> // blocking vs spinning
struct Baton {
constexpr Baton() : state_(INIT) {}
/// Blocking versions
///
- if (SinglePoster) {
- /// Single poster version
- ///
- uint32_t before = state_.load(std::memory_order_acquire);
-
- assert(before == INIT || before == WAITING || before == TIMED_OUT);
+ uint32_t before = state_.load(std::memory_order_acquire);
- if (before == INIT &&
- state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
- return;
- }
+ assert(before == INIT || before == WAITING || before == TIMED_OUT);
- assert(before == WAITING || before == TIMED_OUT);
+ if (before == INIT &&
+ state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
+ return;
+ }
- if (before == TIMED_OUT) {
- return;
- }
+ assert(before == WAITING || before == TIMED_OUT);
- assert(before == WAITING);
- state_.store(LATE_DELIVERY, std::memory_order_release);
- state_.futexWake(1);
- } else {
- /// Multi-poster version
- ///
- while (true) {
- uint32_t before = state_.load(std::memory_order_acquire);
-
- if (before == INIT &&
- state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
- return;
- }
-
- if (before == TIMED_OUT) {
- return;
- }
-
- if (before == EARLY_DELIVERY || before == LATE_DELIVERY) {
- // The reason for not simply returning (without the following
- // atomic operation) is to avoid the following case:
- //
- // T1: T2: T3:
- // local1.post(); local2.post(); global.wait();
- // global.post(); global.post(); local1.try_wait() == true;
- // local2.try_wait() == false;
- //
- if (state_.fetch_add(0) != before) {
- continue;
- }
- return;
- }
-
- assert(before == WAITING);
- if (!state_.compare_exchange_weak(before, LATE_DELIVERY)) {
- continue;
- }
- state_.futexWake(1);
- return;
- }
+ if (before == TIMED_OUT) {
+ return;
}
+
+ assert(before == WAITING);
+ state_.store(LATE_DELIVERY, std::memory_order_release);
+ state_.futexWake(1);
}
/// Waits until post() has been called in the current Baton lifetime.
/// could be relaxed somewhat without any perf or size regressions,
/// but by making this condition very restrictive we can provide better
/// checking in debug builds.
- void wait() {
- if (spinWaitForEarlyDelivery()) {
- assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
+ FOLLY_ALWAYS_INLINE void wait() {
+ if (try_wait()) {
return;
}
- if (!Blocking) {
- while (!try_wait()) {
- std::this_thread::yield();
- }
- return;
- }
-
- // guess we have to block :(
- uint32_t expected = INIT;
- if (!state_.compare_exchange_strong(expected, WAITING)) {
- // CAS failed, last minute reprieve
- assert(expected == EARLY_DELIVERY);
- return;
- }
-
- while (true) {
- detail::MemoryIdler::futexWait(state_, WAITING);
-
- // state_ is the truth even if FUTEX_WAIT reported a matching
- // FUTEX_WAKE, since we aren't using type-stable storage and we
- // don't guarantee reuse. The scenario goes like this: thread
- // A's last touch of a Baton is a call to wake(), which stores
- // LATE_DELIVERY and gets an unlucky context switch before delivering
- // the corresponding futexWake. Thread B sees LATE_DELIVERY
- // without consuming a futex event, because it calls futexWait
- // with an expected value of WAITING and hence doesn't go to sleep.
- // B returns, so the Baton's memory is reused and becomes another
- // Baton (or a reuse of this one). B calls futexWait on the new
- // Baton lifetime, then A wakes up and delivers a spurious futexWake
- // to the same memory location. B's futexWait will then report a
- // consumed wake event even though state_ is still WAITING.
- //
- // It would be possible to add an extra state_ dance to communicate
- // that the futexWake has been sent so that we can be sure to consume
- // it before returning, but that would be a perf and complexity hit.
- uint32_t s = state_.load(std::memory_order_acquire);
- assert(s == WAITING || s == LATE_DELIVERY);
-
- if (s == LATE_DELIVERY) {
- return;
- }
- // retry
- }
+ waitSlow();
}
/// Similar to wait, but doesn't block the thread if it hasn't been posted.
/// call wait, try_wait or timed_wait on the same baton without resetting
///
/// @return true if baton has been posted, false othewise
- bool try_wait() const {
+ FOLLY_ALWAYS_INLINE bool try_wait() const {
auto s = state_.load(std::memory_order_acquire);
assert(s == INIT || s == EARLY_DELIVERY);
- return s == EARLY_DELIVERY;
+ return LIKELY(s == EARLY_DELIVERY);
}
/// Similar to wait, but with a timeout. The thread is unblocked if the
/// @return true if the baton was posted to before timeout,
/// false otherwise
template <typename Rep, typename Period>
- bool try_wait_for(const std::chrono::duration<Rep, Period>& timeout) {
+ FOLLY_ALWAYS_INLINE bool try_wait_for(
+ const std::chrono::duration<Rep, Period>& timeout) {
static_assert(
Blocking, "Non-blocking Baton does not support try_wait_for.");
+ if (try_wait()) {
+ return true;
+ }
+
auto deadline = std::chrono::steady_clock::now() + timeout;
- return try_wait_until(deadline);
+ return tryWaitUntilSlow(deadline);
}
/// Similar to wait, but with a deadline. The thread is unblocked if the
/// @return true if the baton was posted to before deadline,
/// false otherwise
template <typename Clock, typename Duration>
- bool try_wait_until(
+ FOLLY_ALWAYS_INLINE bool try_wait_until(
const std::chrono::time_point<Clock, Duration>& deadline) {
static_assert(
Blocking, "Non-blocking Baton does not support try_wait_until.");
- if (spinWaitForEarlyDelivery()) {
- assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
- return true;
- }
-
- // guess we have to block :(
- uint32_t expected = INIT;
- if (!state_.compare_exchange_strong(expected, WAITING)) {
- // CAS failed, last minute reprieve
- assert(expected == EARLY_DELIVERY);
+ if (try_wait()) {
return true;
}
- while (true) {
- auto rv = state_.futexWaitUntil(WAITING, deadline);
- if (rv == folly::detail::FutexResult::TIMEDOUT) {
- state_.store(TIMED_OUT, std::memory_order_release);
- return false;
- }
-
- uint32_t s = state_.load(std::memory_order_acquire);
- assert(s == WAITING || s == LATE_DELIVERY);
- if (s == LATE_DELIVERY) {
- return true;
- }
- }
+ return tryWaitUntilSlow(deadline);
}
/// Alias to try_wait_for. Deprecated.
template <typename Rep, typename Period>
- bool timed_wait(const std::chrono::duration<Rep, Period>& timeout) {
+ FOLLY_ALWAYS_INLINE bool timed_wait(
+ const std::chrono::duration<Rep, Period>& timeout) {
return try_wait_for(timeout);
}
/// Alias to try_wait_until. Deprecated.
template <typename Clock, typename Duration>
- bool timed_wait(const std::chrono::time_point<Clock, Duration>& deadline) {
+ FOLLY_ALWAYS_INLINE bool timed_wait(
+ const std::chrono::time_point<Clock, Duration>& deadline) {
return try_wait_until(deadline);
}
EARLY_DELIVERY = 1,
WAITING = 2,
LATE_DELIVERY = 3,
- TIMED_OUT = 4
+ TIMED_OUT = 4,
};
enum {
// false otherwise. If the function returns true then
// state_ is guaranteed to be EARLY_DELIVERY
bool spinWaitForEarlyDelivery() {
-
- static_assert(PreBlockAttempts > 0,
+ static_assert(
+ PreBlockAttempts > 0,
"isn't this assert clearer than an uninitialized variable warning?");
for (int i = 0; i < PreBlockAttempts; ++i) {
if (try_wait()) {
- // hooray!
return true;
}
+
// The pause instruction is the polite way to spin, but it doesn't
// actually affect correctness to omit it if we don't have it.
// Pausing donates the full capabilities of the current core to
return false;
}
+ FOLLY_NOINLINE void waitSlow() {
+ if (spinWaitForEarlyDelivery()) {
+ assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
+ return;
+ }
+
+ if (!Blocking) {
+ while (!try_wait()) {
+ std::this_thread::yield();
+ }
+ return;
+ }
+
+ // guess we have to block :(
+ uint32_t expected = INIT;
+ if (!state_.compare_exchange_strong(expected, WAITING)) {
+ // CAS failed, last minute reprieve
+ assert(expected == EARLY_DELIVERY);
+ return;
+ }
+
+ while (true) {
+ detail::MemoryIdler::futexWait(state_, WAITING);
+
+ // state_ is the truth even if FUTEX_WAIT reported a matching
+ // FUTEX_WAKE, since we aren't using type-stable storage and we
+ // don't guarantee reuse. The scenario goes like this: thread
+ // A's last touch of a Baton is a call to wake(), which stores
+ // LATE_DELIVERY and gets an unlucky context switch before delivering
+ // the corresponding futexWake. Thread B sees LATE_DELIVERY
+ // without consuming a futex event, because it calls futexWait
+ // with an expected value of WAITING and hence doesn't go to sleep.
+ // B returns, so the Baton's memory is reused and becomes another
+ // Baton (or a reuse of this one). B calls futexWait on the new
+ // Baton lifetime, then A wakes up and delivers a spurious futexWake
+ // to the same memory location. B's futexWait will then report a
+ // consumed wake event even though state_ is still WAITING.
+ //
+ // It would be possible to add an extra state_ dance to communicate
+ // that the futexWake has been sent so that we can be sure to consume
+ // it before returning, but that would be a perf and complexity hit.
+ uint32_t s = state_.load(std::memory_order_acquire);
+ assert(s == WAITING || s == LATE_DELIVERY);
+
+ if (s == LATE_DELIVERY) {
+ return;
+ }
+ // retry
+ }
+ }
+
+ template <typename Clock, typename Duration>
+ FOLLY_NOINLINE bool tryWaitUntilSlow(
+ const std::chrono::time_point<Clock, Duration>& deadline) {
+ if (spinWaitForEarlyDelivery()) {
+ assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
+ return true;
+ }
+
+ // guess we have to block :(
+ uint32_t expected = INIT;
+ if (!state_.compare_exchange_strong(expected, WAITING)) {
+ // CAS failed, last minute reprieve
+ assert(expected == EARLY_DELIVERY);
+ return true;
+ }
+
+ while (true) {
+ auto rv = state_.futexWaitUntil(WAITING, deadline);
+ if (rv == folly::detail::FutexResult::TIMEDOUT) {
+ state_.store(TIMED_OUT, std::memory_order_release);
+ return false;
+ }
+
+ uint32_t s = state_.load(std::memory_order_acquire);
+ assert(s == WAITING || s == LATE_DELIVERY);
+ if (s == LATE_DELIVERY) {
+ return true;
+ }
+ }
+ }
+
detail::Futex<Atom> state_;
};