/*
- * Copyright 2017 Facebook, Inc.
+ * Copyright 2014-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <atomic>
#include <thread>
+#include <folly/Likely.h>
#include <folly/detail/Futex.h>
#include <folly/detail/MemoryIdler.h>
#include <folly/portability/Asm.h>
namespace folly {
-/// A Baton allows a thread to block once and be awoken. The single
-/// poster version (with SinglePoster == true) captures a single
-/// handoff, and during its lifecycle (from construction/reset to
-/// destruction/reset) a baton must either be post()ed and wait()ed
+/// A Baton allows a thread to block once and be awoken. Captures a
+/// single handoff, and during its lifecycle (from construction/reset
+/// to destruction/reset) a baton must either be post()ed and wait()ed
/// exactly once each, or not at all.
///
-/// The multi-poster version (SinglePoster == false) allows multiple
-/// concurrent handoff attempts, the first of which completes the
-/// handoff and the rest if any are idempotent.
-///
/// Baton includes no internal padding, and is only 4 bytes in size.
/// Any alignment or padding to avoid false sharing is up to the user.
///
-/// This is basically a stripped-down semaphore that supports (only a
-/// single call to sem_post, when SinglePoster == true) and a single
-/// call to sem_wait.
+/// This is basically a stripped-down semaphore that supports only a
+/// single call to sem_post and a single call to sem_wait.
///
-/// The non-blocking version (Blocking == false) provides more speed
+/// The non-blocking version (MayBlock == false) provides more speed
/// by using only load acquire and store release operations in the
-/// critical path, at the cost of disallowing blocking and timing out.
+/// critical path, at the cost of disallowing blocking.
///
/// The current posix semaphore sem_t isn't too bad, but this provides
/// more a bit more speed, inlining, smaller size, a guarantee that
/// DeterministicSchedule. By having a much more restrictive
/// lifecycle we can also add a bunch of assertions that can help to
/// catch race conditions ahead of time.
-template <
- template <typename> class Atom = std::atomic,
- bool SinglePoster = true, // single vs multiple posters
- bool Blocking = true> // blocking vs spinning
+template <bool MayBlock = true, template <typename> class Atom = std::atomic>
struct Baton {
- constexpr Baton() : state_(INIT) {}
+ constexpr Baton() noexcept : state_(INIT) {}
Baton(Baton const&) = delete;
Baton& operator=(Baton const&) = delete;
/// It is an error to destroy a Baton on which a thread is currently
/// wait()ing. In practice this means that the waiter usually takes
/// responsibility for destroying the Baton.
- ~Baton() {
+ ~Baton() noexcept {
// The docblock for this function says that it can't be called when
// there is a concurrent waiter. We assume a strong version of this
// requirement in which the caller must _know_ that this is true, they
assert(state_.load(std::memory_order_relaxed) != WAITING);
}
+ FOLLY_ALWAYS_INLINE bool ready() const noexcept {
+ auto s = state_.load(std::memory_order_acquire);
+ assert(s == INIT || s == EARLY_DELIVERY);
+ return LIKELY(s == EARLY_DELIVERY);
+ }
+
/// Equivalent to destroying the Baton and creating a new one. It is
/// a bug to call this while there is a waiting thread, so in practice
/// the waiter will be the one that resets the baton.
- void reset() {
+ void reset() noexcept {
// See ~Baton for a discussion about why relaxed is okay here
assert(state_.load(std::memory_order_relaxed) != WAITING);
/// lifetime starts at construction or reset() and ends at
/// destruction or reset()) there can be at most one call to post(),
/// in the single poster version. Any thread may call post().
- void post() {
- if (!Blocking) {
- /// Non-blocking version
+ void post() noexcept {
+ if (!MayBlock) {
+ /// Spin-only version
///
assert([&] {
auto state = state_.load(std::memory_order_relaxed);
return;
}
- /// Blocking versions
+ /// May-block versions
///
- if (SinglePoster) {
- /// Single poster version
- ///
- uint32_t before = state_.load(std::memory_order_acquire);
-
- assert(before == INIT || before == WAITING || before == TIMED_OUT);
+ uint32_t before = state_.load(std::memory_order_acquire);
- if (before == INIT &&
- state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
- return;
- }
-
- assert(before == WAITING || before == TIMED_OUT);
-
- if (before == TIMED_OUT) {
- return;
- }
+ assert(before == INIT || before == WAITING || before == TIMED_OUT);
- assert(before == WAITING);
- state_.store(LATE_DELIVERY, std::memory_order_release);
- state_.futexWake(1);
- } else {
- /// Multi-poster version
- ///
- while (true) {
- uint32_t before = state_.load(std::memory_order_acquire);
-
- if (before == INIT &&
- state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
- return;
- }
-
- if (before == TIMED_OUT) {
- return;
- }
+ if (before == INIT &&
+ state_.compare_exchange_strong(
+ before,
+ EARLY_DELIVERY,
+ std::memory_order_release,
+ std::memory_order_relaxed)) {
+ return;
+ }
- if (before == EARLY_DELIVERY || before == LATE_DELIVERY) {
- // The reason for not simply returning (without the following
- // atomic operation) is to avoid the following case:
- //
- // T1: T2: T3:
- // local1.post(); local2.post(); global.wait();
- // global.post(); global.post(); local1.try_wait() == true;
- // local2.try_wait() == false;
- //
- if (state_.fetch_add(0) != before) {
- continue;
- }
- return;
- }
+ assert(before == WAITING || before == TIMED_OUT);
- assert(before == WAITING);
- if (!state_.compare_exchange_weak(before, LATE_DELIVERY)) {
- continue;
- }
- state_.futexWake(1);
- return;
- }
+ if (before == TIMED_OUT) {
+ return;
}
+
+ assert(before == WAITING);
+ state_.store(LATE_DELIVERY, std::memory_order_release);
+ state_.futexWake(1);
}
/// Waits until post() has been called in the current Baton lifetime.
/// could be relaxed somewhat without any perf or size regressions,
/// but by making this condition very restrictive we can provide better
/// checking in debug builds.
- void wait() {
- if (spinWaitForEarlyDelivery()) {
+ FOLLY_ALWAYS_INLINE void wait() noexcept {
+ if (try_wait()) {
+ return;
+ }
+
+ waitSlow();
+ }
+
+ /// Similar to wait, but doesn't block the thread if it hasn't been posted.
+ ///
+ /// try_wait has the following semantics:
+ /// - It is ok to call try_wait any number times on the same baton until
+ /// try_wait reports that the baton has been posted.
+ /// - It is ok to call timed_wait or wait on the same baton if try_wait
+ /// reports that baton hasn't been posted.
+ /// - If try_wait indicates that the baton has been posted, it is invalid to
+ /// call wait, try_wait or timed_wait on the same baton without resetting
+ ///
+ /// @return true if baton has been posted, false othewise
+ FOLLY_ALWAYS_INLINE bool try_wait() const noexcept {
+ return ready();
+ }
+
+ /// Similar to wait, but with a timeout. The thread is unblocked if the
+ /// timeout expires.
+ /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
+ /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
+ /// words, after try_wait_for the caller can't invoke
+ /// wait/try_wait/try_wait_for/try_wait_until
+ /// again on the same baton without resetting it.
+ ///
+ /// @param timeout Time until which the thread can block
+ /// @return true if the baton was posted to before timeout,
+ /// false otherwise
+ template <typename Rep, typename Period>
+ FOLLY_ALWAYS_INLINE bool try_wait_for(
+ const std::chrono::duration<Rep, Period>& timeout) noexcept {
+ if (try_wait()) {
+ return true;
+ }
+
+ auto deadline = std::chrono::steady_clock::now() + timeout;
+ return tryWaitUntilSlow(deadline);
+ }
+
+ /// Similar to wait, but with a deadline. The thread is unblocked if the
+ /// deadline expires.
+ /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
+ /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
+ /// words, after try_wait_until the caller can't invoke
+ /// wait/try_wait/try_wait_for/try_wait_until
+ /// again on the same baton without resetting it.
+ ///
+ /// @param deadline Time until which the thread can block
+ /// @return true if the baton was posted to before deadline,
+ /// false otherwise
+ template <typename Clock, typename Duration>
+ FOLLY_ALWAYS_INLINE bool try_wait_until(
+ const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
+ if (try_wait()) {
+ return true;
+ }
+
+ return tryWaitUntilSlow(deadline);
+ }
+
+ /// Alias to try_wait_for. Deprecated.
+ template <typename Rep, typename Period>
+ FOLLY_ALWAYS_INLINE bool timed_wait(
+ const std::chrono::duration<Rep, Period>& timeout) noexcept {
+ return try_wait_for(timeout);
+ }
+
+ /// Alias to try_wait_until. Deprecated.
+ template <typename Clock, typename Duration>
+ FOLLY_ALWAYS_INLINE bool timed_wait(
+ const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
+ return try_wait_until(deadline);
+ }
+
+ private:
+ enum State : uint32_t {
+ INIT = 0,
+ EARLY_DELIVERY = 1,
+ WAITING = 2,
+ LATE_DELIVERY = 3,
+ TIMED_OUT = 4,
+ };
+
+ enum {
+ // Must be positive. If multiple threads are actively using a
+ // higher-level data structure that uses batons internally, it is
+ // likely that the post() and wait() calls happen almost at the same
+ // time. In this state, we lose big 50% of the time if the wait goes
+ // to sleep immediately. On circa-2013 devbox hardware it costs about
+ // 7 usec to FUTEX_WAIT and then be awoken (half the t/iter as the
+ // posix_sem_pingpong test in BatonTests). We can improve our chances
+ // of EARLY_DELIVERY by spinning for a bit, although we have to balance
+ // this against the loss if we end up sleeping any way. Spins on this
+ // hw take about 7 nanos (all but 0.5 nanos is the pause instruction).
+ // We give ourself 300 spins, which is about 2 usec of waiting. As a
+ // partial consolation, since we are using the pause instruction we
+ // are giving a speed boost to the colocated hyperthread.
+ PreBlockAttempts = 300,
+ };
+
+ // Spin for "some time" (see discussion on PreBlockAttempts) waiting
+ // for a post.
+ //
+ // @return true if we received an early delivery during the wait,
+ // false otherwise. If the function returns true then
+ // state_ is guaranteed to be EARLY_DELIVERY
+ template <typename Clock, typename Duration>
+ bool spinWaitForEarlyDelivery(
+ const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
+ for (int i = 0; i < PreBlockAttempts; ++i) {
+ if (try_wait()) {
+ return true;
+ }
+ if (Clock::now() >= deadline) {
+ return false;
+ }
+ // The pause instruction is the polite way to spin, but it doesn't
+ // actually affect correctness to omit it if we don't have it.
+ // Pausing donates the full capabilities of the current core to
+ // its other hyperthreads for a dozen cycles or so
+ asm_volatile_pause();
+ }
+
+ return false;
+ }
+
+ FOLLY_NOINLINE void waitSlow() noexcept {
+ auto const deadline = std::chrono::steady_clock::time_point::max();
+ if (spinWaitForEarlyDelivery(deadline)) {
assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
return;
}
- if (!Blocking) {
+ if (!MayBlock) {
while (!try_wait()) {
std::this_thread::yield();
}
}
}
- /// Similar to wait, but with a timeout. The thread is unblocked if the
- /// timeout expires.
- /// Note: Only a single call to timed_wait/wait is allowed during a baton's
- /// life-cycle (from construction/reset to destruction/reset). In other
- /// words, after timed_wait the caller can't invoke wait/timed_wait/try_wait
- /// again on the same baton without resetting it.
- ///
- /// @param deadline Time until which the thread can block
- /// @return true if the baton was posted to before timeout,
- /// false otherwise
- template <typename Clock, typename Duration = typename Clock::duration>
- bool timed_wait(const std::chrono::time_point<Clock,Duration>& deadline) {
- static_assert(Blocking, "Non-blocking Baton does not support timed wait.");
-
- if (spinWaitForEarlyDelivery()) {
+ template <typename Clock, typename Duration>
+ FOLLY_NOINLINE bool tryWaitUntilSlow(
+ const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
+ if (spinWaitForEarlyDelivery(deadline)) {
assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
return true;
}
+ if (!MayBlock) {
+ while (true) {
+ if (try_wait()) {
+ return true;
+ }
+ if (Clock::now() >= deadline) {
+ return false;
+ }
+ std::this_thread::yield();
+ }
+ }
+
// guess we have to block :(
uint32_t expected = INIT;
if (!state_.compare_exchange_strong(expected, WAITING)) {
}
}
- /// Similar to timed_wait, but with a duration.
- template <typename Clock = std::chrono::steady_clock, typename Duration>
- bool timed_wait(const Duration& duration) {
- auto deadline = Clock::now() + duration;
- return timed_wait(deadline);
- }
-
- /// Similar to wait, but doesn't block the thread if it hasn't been posted.
- ///
- /// try_wait has the following semantics:
- /// - It is ok to call try_wait any number times on the same baton until
- /// try_wait reports that the baton has been posted.
- /// - It is ok to call timed_wait or wait on the same baton if try_wait
- /// reports that baton hasn't been posted.
- /// - If try_wait indicates that the baton has been posted, it is invalid to
- /// call wait, try_wait or timed_wait on the same baton without resetting
- ///
- /// @return true if baton has been posted, false othewise
- bool try_wait() const {
- auto s = state_.load(std::memory_order_acquire);
- assert(s == INIT || s == EARLY_DELIVERY);
- return s == EARLY_DELIVERY;
- }
-
- private:
- enum State : uint32_t {
- INIT = 0,
- EARLY_DELIVERY = 1,
- WAITING = 2,
- LATE_DELIVERY = 3,
- TIMED_OUT = 4
- };
-
- enum {
- // Must be positive. If multiple threads are actively using a
- // higher-level data structure that uses batons internally, it is
- // likely that the post() and wait() calls happen almost at the same
- // time. In this state, we lose big 50% of the time if the wait goes
- // to sleep immediately. On circa-2013 devbox hardware it costs about
- // 7 usec to FUTEX_WAIT and then be awoken (half the t/iter as the
- // posix_sem_pingpong test in BatonTests). We can improve our chances
- // of EARLY_DELIVERY by spinning for a bit, although we have to balance
- // this against the loss if we end up sleeping any way. Spins on this
- // hw take about 7 nanos (all but 0.5 nanos is the pause instruction).
- // We give ourself 300 spins, which is about 2 usec of waiting. As a
- // partial consolation, since we are using the pause instruction we
- // are giving a speed boost to the colocated hyperthread.
- PreBlockAttempts = 300,
- };
-
- // Spin for "some time" (see discussion on PreBlockAttempts) waiting
- // for a post.
- //
- // @return true if we received an early delivery during the wait,
- // false otherwise. If the function returns true then
- // state_ is guaranteed to be EARLY_DELIVERY
- bool spinWaitForEarlyDelivery() {
-
- static_assert(PreBlockAttempts > 0,
- "isn't this assert clearer than an uninitialized variable warning?");
- for (int i = 0; i < PreBlockAttempts; ++i) {
- if (try_wait()) {
- // hooray!
- return true;
- }
- // The pause instruction is the polite way to spin, but it doesn't
- // actually affect correctness to omit it if we don't have it.
- // Pausing donates the full capabilities of the current core to
- // its other hyperthreads for a dozen cycles or so
- asm_volatile_pause();
- }
-
- return false;
- }
-
detail::Futex<Atom> state_;
};