folly/synchronization/Baton.h

   1 /*
   2  * Copyright 2014-present Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <assert.h>
  20 #include <errno.h>
  21 #include <stdint.h>
  22 #include <atomic>
  23 #include <thread>
  24
  25 #include <folly/Likely.h>
  26 #include <folly/detail/Futex.h>
  27 #include <folly/detail/MemoryIdler.h>
  28 #include <folly/portability/Asm.h>
  29 #include <folly/synchronization/WaitOptions.h>
  30
  31 namespace folly {
  32
  33 /// A Baton allows a thread to block once and be awoken. Captures a
  34 /// single handoff, and during its lifecycle (from construction/reset
  35 /// to destruction/reset) a baton must either be post()ed and wait()ed
  36 /// exactly once each, or not at all.
  37 ///
  38 /// Baton includes no internal padding, and is only 4 bytes in size.
  39 /// Any alignment or padding to avoid false sharing is up to the user.
  40 ///
  41 /// This is basically a stripped-down semaphore that supports only a
  42 /// single call to sem_post and a single call to sem_wait.
  43 ///
  44 /// The non-blocking version (MayBlock == false) provides more speed
  45 /// by using only load acquire and store release operations in the
  46 /// critical path, at the cost of disallowing blocking.
  47 ///
  48 /// The current posix semaphore sem_t isn't too bad, but this provides
  49 /// more a bit more speed, inlining, smaller size, a guarantee that
  50 /// the implementation won't change, and compatibility with
  51 /// DeterministicSchedule.  By having a much more restrictive
  52 /// lifecycle we can also add a bunch of assertions that can help to
  53 /// catch race conditions ahead of time.
  54 template <bool MayBlock = true, template <typename> class Atom = std::atomic>
  55 class Baton {
  56  public:
  57   FOLLY_ALWAYS_INLINE static WaitOptions wait_options() {
  58     return {};
  59   }
  60
  61   constexpr Baton() noexcept : state_(INIT) {}
  62
  63   Baton(Baton const&) = delete;
  64   Baton& operator=(Baton const&) = delete;
  65
  66   /// It is an error to destroy a Baton on which a thread is currently
  67   /// wait()ing.  In practice this means that the waiter usually takes
  68   /// responsibility for destroying the Baton.
  69   ~Baton() noexcept {
  70     // The docblock for this function says that it can't be called when
  71     // there is a concurrent waiter.  We assume a strong version of this
  72     // requirement in which the caller must _know_ that this is true, they
  73     // are not allowed to be merely lucky.  If two threads are involved,
  74     // the destroying thread must actually have synchronized with the
  75     // waiting thread after wait() returned.  To convey causality the the
  76     // waiting thread must have used release semantics and the destroying
  77     // thread must have used acquire semantics for that communication,
  78     // so we are guaranteed to see the post-wait() value of state_,
  79     // which cannot be WAITING.
  80     //
  81     // Note that since we only care about a single memory location,
  82     // the only two plausible memory orders here are relaxed and seq_cst.
  83     assert(state_.load(std::memory_order_relaxed) != WAITING);
  84   }
  85
  86   FOLLY_ALWAYS_INLINE bool ready() const noexcept {
  87     auto s = state_.load(std::memory_order_acquire);
  88     assert(s == INIT || s == EARLY_DELIVERY);
  89     return LIKELY(s == EARLY_DELIVERY);
  90   }
  91
  92   /// Equivalent to destroying the Baton and creating a new one.  It is
  93   /// a bug to call this while there is a waiting thread, so in practice
  94   /// the waiter will be the one that resets the baton.
  95   void reset() noexcept {
  96     // See ~Baton for a discussion about why relaxed is okay here
  97     assert(state_.load(std::memory_order_relaxed) != WAITING);
  98
  99     // We use a similar argument to justify the use of a relaxed store
 100     // here.  Since both wait() and post() are required to be called
 101     // only once per lifetime, no thread can actually call those methods
 102     // correctly after a reset() unless it synchronizes with the thread
 103     // that performed the reset().  If a post() or wait() on another thread
 104     // didn't synchronize, then regardless of what operation we performed
 105     // here there would be a race on proper use of the Baton's spec
 106     // (although not on any particular load and store).  Put another way,
 107     // we don't need to synchronize here because anybody that might rely
 108     // on such synchronization is required by the baton rules to perform
 109     // an additional synchronization that has the desired effect anyway.
 110     //
 111     // There is actually a similar argument to be made about the
 112     // constructor, in which the fenceless constructor initialization
 113     // of state_ is piggybacked on whatever synchronization mechanism
 114     // distributes knowledge of the Baton's existence
 115     state_.store(INIT, std::memory_order_relaxed);
 116   }
 117
 118   /// Causes wait() to wake up.  For each lifetime of a Baton (where a
 119   /// lifetime starts at construction or reset() and ends at
 120   /// destruction or reset()) there can be at most one call to post(),
 121   /// in the single poster version.  Any thread may call post().
 122   void post() noexcept {
 123     if (!MayBlock) {
 124       /// Spin-only version
 125       ///
 126       assert(
 127           ((1 << state_.load(std::memory_order_relaxed)) &
 128            ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0);
 129       state_.store(EARLY_DELIVERY, std::memory_order_release);
 130       return;
 131     }
 132
 133     /// May-block versions
 134     ///
 135     uint32_t before = state_.load(std::memory_order_acquire);
 136
 137     assert(before == INIT || before == WAITING || before == TIMED_OUT);
 138
 139     if (before == INIT &&
 140         state_.compare_exchange_strong(
 141             before,
 142             EARLY_DELIVERY,
 143             std::memory_order_release,
 144             std::memory_order_relaxed)) {
 145       return;
 146     }
 147
 148     assert(before == WAITING || before == TIMED_OUT);
 149
 150     if (before == TIMED_OUT) {
 151       return;
 152     }
 153
 154     assert(before == WAITING);
 155     state_.store(LATE_DELIVERY, std::memory_order_release);
 156     state_.futexWake(1);
 157   }
 158
 159   /// Waits until post() has been called in the current Baton lifetime.
 160   /// May be called at most once during a Baton lifetime (construction
 161   /// |reset until destruction|reset).  If post is called before wait in
 162   /// the current lifetime then this method returns immediately.
 163   ///
 164   /// The restriction that there can be at most one wait() per lifetime
 165   /// could be relaxed somewhat without any perf or size regressions,
 166   /// but by making this condition very restrictive we can provide better
 167   /// checking in debug builds.
 168   FOLLY_ALWAYS_INLINE
 169   void wait(const WaitOptions& opt = wait_options()) noexcept {
 170     if (try_wait()) {
 171       return;
 172     }
 173
 174     waitSlow(opt);
 175   }
 176
 177   /// Similar to wait, but doesn't block the thread if it hasn't been posted.
 178   ///
 179   /// try_wait has the following semantics:
 180   /// - It is ok to call try_wait any number times on the same baton until
 181   ///   try_wait reports that the baton has been posted.
 182   /// - It is ok to call timed_wait or wait on the same baton if try_wait
 183   ///   reports that baton hasn't been posted.
 184   /// - If try_wait indicates that the baton has been posted, it is invalid to
 185   ///   call wait, try_wait or timed_wait on the same baton without resetting
 186   ///
 187   /// @return       true if baton has been posted, false othewise
 188   FOLLY_ALWAYS_INLINE bool try_wait() const noexcept {
 189     return ready();
 190   }
 191
 192   /// Similar to wait, but with a timeout. The thread is unblocked if the
 193   /// timeout expires.
 194   /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
 195   /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
 196   /// words, after try_wait_for the caller can't invoke
 197   /// wait/try_wait/try_wait_for/try_wait_until
 198   /// again on the same baton without resetting it.
 199   ///
 200   /// @param  timeout       Time until which the thread can block
 201   /// @return               true if the baton was posted to before timeout,
 202   ///                       false otherwise
 203   template <typename Rep, typename Period>
 204   FOLLY_ALWAYS_INLINE bool try_wait_for(
 205       const std::chrono::duration<Rep, Period>& timeout,
 206       const WaitOptions& opt = wait_options()) noexcept {
 207     if (try_wait()) {
 208       return true;
 209     }
 210
 211     auto deadline = std::chrono::steady_clock::now() + timeout;
 212     return tryWaitUntilSlow(deadline, opt);
 213   }
 214
 215   /// Similar to wait, but with a deadline. The thread is unblocked if the
 216   /// deadline expires.
 217   /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
 218   /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
 219   /// words, after try_wait_until the caller can't invoke
 220   /// wait/try_wait/try_wait_for/try_wait_until
 221   /// again on the same baton without resetting it.
 222   ///
 223   /// @param  deadline      Time until which the thread can block
 224   /// @return               true if the baton was posted to before deadline,
 225   ///                       false otherwise
 226   template <typename Clock, typename Duration>
 227   FOLLY_ALWAYS_INLINE bool try_wait_until(
 228       const std::chrono::time_point<Clock, Duration>& deadline,
 229       const WaitOptions& opt = wait_options()) noexcept {
 230     if (try_wait()) {
 231       return true;
 232     }
 233
 234     return tryWaitUntilSlow(deadline, opt);
 235   }
 236
 237   /// Alias to try_wait_for. Deprecated.
 238   template <typename Rep, typename Period>
 239   FOLLY_ALWAYS_INLINE bool timed_wait(
 240       const std::chrono::duration<Rep, Period>& timeout) noexcept {
 241     return try_wait_for(timeout);
 242   }
 243
 244   /// Alias to try_wait_until. Deprecated.
 245   template <typename Clock, typename Duration>
 246   FOLLY_ALWAYS_INLINE bool timed_wait(
 247       const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
 248     return try_wait_until(deadline);
 249   }
 250
 251  private:
 252   enum State : uint32_t {
 253     INIT = 0,
 254     EARLY_DELIVERY = 1,
 255     WAITING = 2,
 256     LATE_DELIVERY = 3,
 257     TIMED_OUT = 4,
 258   };
 259
 260   // Spin for "some time"
 261   //
 262   // @return       true if we received an early delivery during the wait,
 263   //               false otherwise. If the function returns true then
 264   //               state_ is guaranteed to be EARLY_DELIVERY
 265   template <typename Clock, typename Duration>
 266   bool spinWaitForEarlyDelivery(
 267       const std::chrono::time_point<Clock, Duration>& deadline,
 268       const WaitOptions& opt) noexcept {
 269     auto tbegin = Clock::now();
 270     while (true) {
 271       if (try_wait()) {
 272         return true;
 273       }
 274
 275       auto const tnow = Clock::now();
 276       if (tnow >= deadline) {
 277         return false;
 278       }
 279
 280       // Backward time discontinuity in Clock? revise pre_block starting point
 281       tbegin = std::min(tbegin, tnow);
 282       auto const dur = std::chrono::duration_cast<Duration>(tnow - tbegin);
 283       if (dur >= opt.spin_max()) {
 284         return false;
 285       }
 286
 287       // The pause instruction is the polite way to spin, but it doesn't
 288       // actually affect correctness to omit it if we don't have it.
 289       // Pausing donates the full capabilities of the current core to
 290       // its other hyperthreads for a dozen cycles or so
 291       asm_volatile_pause();
 292     }
 293   }
 294
 295   FOLLY_NOINLINE void waitSlow(const WaitOptions& opt) noexcept {
 296     auto const deadline = std::chrono::steady_clock::time_point::max();
 297     if (spinWaitForEarlyDelivery(deadline, opt)) {
 298       assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
 299       return;
 300     }
 301
 302     if (!MayBlock) {
 303       while (!try_wait()) {
 304         std::this_thread::yield();
 305       }
 306       return;
 307     }
 308
 309     // guess we have to block :(
 310     uint32_t expected = INIT;
 311     if (!state_.compare_exchange_strong(expected, WAITING)) {
 312       // CAS failed, last minute reprieve
 313       assert(expected == EARLY_DELIVERY);
 314       return;
 315     }
 316
 317     while (true) {
 318       detail::MemoryIdler::futexWait(state_, WAITING);
 319
 320       // state_ is the truth even if FUTEX_WAIT reported a matching
 321       // FUTEX_WAKE, since we aren't using type-stable storage and we
 322       // don't guarantee reuse.  The scenario goes like this: thread
 323       // A's last touch of a Baton is a call to wake(), which stores
 324       // LATE_DELIVERY and gets an unlucky context switch before delivering
 325       // the corresponding futexWake.  Thread B sees LATE_DELIVERY
 326       // without consuming a futex event, because it calls futexWait
 327       // with an expected value of WAITING and hence doesn't go to sleep.
 328       // B returns, so the Baton's memory is reused and becomes another
 329       // Baton (or a reuse of this one).  B calls futexWait on the new
 330       // Baton lifetime, then A wakes up and delivers a spurious futexWake
 331       // to the same memory location.  B's futexWait will then report a
 332       // consumed wake event even though state_ is still WAITING.
 333       //
 334       // It would be possible to add an extra state_ dance to communicate
 335       // that the futexWake has been sent so that we can be sure to consume
 336       // it before returning, but that would be a perf and complexity hit.
 337       uint32_t s = state_.load(std::memory_order_acquire);
 338       assert(s == WAITING || s == LATE_DELIVERY);
 339
 340       if (s == LATE_DELIVERY) {
 341         return;
 342       }
 343       // retry
 344     }
 345   }
 346
 347   template <typename Clock, typename Duration>
 348   FOLLY_NOINLINE bool tryWaitUntilSlow(
 349       const std::chrono::time_point<Clock, Duration>& deadline,
 350       const WaitOptions& opt) noexcept {
 351     if (spinWaitForEarlyDelivery(deadline, opt)) {
 352       assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
 353       return true;
 354     }
 355
 356     if (!MayBlock) {
 357       while (true) {
 358         if (try_wait()) {
 359           return true;
 360         }
 361         if (Clock::now() >= deadline) {
 362           return false;
 363         }
 364         std::this_thread::yield();
 365       }
 366     }
 367
 368     // guess we have to block :(
 369     uint32_t expected = INIT;
 370     if (!state_.compare_exchange_strong(expected, WAITING)) {
 371       // CAS failed, last minute reprieve
 372       assert(expected == EARLY_DELIVERY);
 373       return true;
 374     }
 375
 376     while (true) {
 377       auto rv = state_.futexWaitUntil(WAITING, deadline);
 378       if (rv == folly::detail::FutexResult::TIMEDOUT) {
 379         state_.store(TIMED_OUT, std::memory_order_release);
 380         return false;
 381       }
 382
 383       uint32_t s = state_.load(std::memory_order_acquire);
 384       assert(s == WAITING || s == LATE_DELIVERY);
 385       if (s == LATE_DELIVERY) {
 386         return true;
 387       }
 388     }
 389   }
 390
 391   detail::Futex<Atom> state_;
 392 };
 393
 394 } // namespace folly