folly/synchronization/Baton.h

   1 /*
   2  * Copyright 2014-present Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <assert.h>
  20 #include <errno.h>
  21 #include <stdint.h>
  22 #include <atomic>
  23 #include <thread>
  24
  25 #include <folly/Likely.h>
  26 #include <folly/detail/Futex.h>
  27 #include <folly/detail/MemoryIdler.h>
  28 #include <folly/portability/Asm.h>
  29 #include <folly/synchronization/WaitOptions.h>
  30
  31 namespace folly {
  32
  33 /// A Baton allows a thread to block once and be awoken. Captures a
  34 /// single handoff, and during its lifecycle (from construction/reset
  35 /// to destruction/reset) a baton must either be post()ed and wait()ed
  36 /// exactly once each, or not at all.
  37 ///
  38 /// Baton includes no internal padding, and is only 4 bytes in size.
  39 /// Any alignment or padding to avoid false sharing is up to the user.
  40 ///
  41 /// This is basically a stripped-down semaphore that supports only a
  42 /// single call to sem_post and a single call to sem_wait.
  43 ///
  44 /// The non-blocking version (MayBlock == false) provides more speed
  45 /// by using only load acquire and store release operations in the
  46 /// critical path, at the cost of disallowing blocking.
  47 ///
  48 /// The current posix semaphore sem_t isn't too bad, but this provides
  49 /// more a bit more speed, inlining, smaller size, a guarantee that
  50 /// the implementation won't change, and compatibility with
  51 /// DeterministicSchedule.  By having a much more restrictive
  52 /// lifecycle we can also add a bunch of assertions that can help to
  53 /// catch race conditions ahead of time.
  54 template <bool MayBlock = true, template <typename> class Atom = std::atomic>
  55 class Baton {
  56  public:
  57   FOLLY_ALWAYS_INLINE static WaitOptions wait_options() {
  58     return {};
  59   }
  60
  61   constexpr Baton() noexcept : state_(INIT) {}
  62
  63   Baton(Baton const&) = delete;
  64   Baton& operator=(Baton const&) = delete;
  65
  66   /// It is an error to destroy a Baton on which a thread is currently
  67   /// wait()ing.  In practice this means that the waiter usually takes
  68   /// responsibility for destroying the Baton.
  69   ~Baton() noexcept {
  70     // The docblock for this function says that it can't be called when
  71     // there is a concurrent waiter.  We assume a strong version of this
  72     // requirement in which the caller must _know_ that this is true, they
  73     // are not allowed to be merely lucky.  If two threads are involved,
  74     // the destroying thread must actually have synchronized with the
  75     // waiting thread after wait() returned.  To convey causality the the
  76     // waiting thread must have used release semantics and the destroying
  77     // thread must have used acquire semantics for that communication,
  78     // so we are guaranteed to see the post-wait() value of state_,
  79     // which cannot be WAITING.
  80     //
  81     // Note that since we only care about a single memory location,
  82     // the only two plausible memory orders here are relaxed and seq_cst.
  83     assert(state_.load(std::memory_order_relaxed) != WAITING);
  84   }
  85
  86   FOLLY_ALWAYS_INLINE bool ready() const noexcept {
  87     auto s = state_.load(std::memory_order_acquire);
  88     assert(s == INIT || s == EARLY_DELIVERY);
  89     return LIKELY(s == EARLY_DELIVERY);
  90   }
  91
  92   /// Equivalent to destroying the Baton and creating a new one.  It is
  93   /// a bug to call this while there is a waiting thread, so in practice
  94   /// the waiter will be the one that resets the baton.
  95   void reset() noexcept {
  96     // See ~Baton for a discussion about why relaxed is okay here
  97     assert(state_.load(std::memory_order_relaxed) != WAITING);
  98
  99     // We use a similar argument to justify the use of a relaxed store
 100     // here.  Since both wait() and post() are required to be called
 101     // only once per lifetime, no thread can actually call those methods
 102     // correctly after a reset() unless it synchronizes with the thread
 103     // that performed the reset().  If a post() or wait() on another thread
 104     // didn't synchronize, then regardless of what operation we performed
 105     // here there would be a race on proper use of the Baton's spec
 106     // (although not on any particular load and store).  Put another way,
 107     // we don't need to synchronize here because anybody that might rely
 108     // on such synchronization is required by the baton rules to perform
 109     // an additional synchronization that has the desired effect anyway.
 110     //
 111     // There is actually a similar argument to be made about the
 112     // constructor, in which the fenceless constructor initialization
 113     // of state_ is piggybacked on whatever synchronization mechanism
 114     // distributes knowledge of the Baton's existence
 115     state_.store(INIT, std::memory_order_relaxed);
 116   }
 117
 118   /// Causes wait() to wake up.  For each lifetime of a Baton (where a
 119   /// lifetime starts at construction or reset() and ends at
 120   /// destruction or reset()) there can be at most one call to post(),
 121   /// in the single poster version.  Any thread may call post().
 122   void post() noexcept {
 123     if (!MayBlock) {
 124       /// Spin-only version
 125       ///
 126       assert([&] {
 127         auto state = state_.load(std::memory_order_relaxed);
 128         return (state == INIT || state == EARLY_DELIVERY);
 129       }());
 130       state_.store(EARLY_DELIVERY, std::memory_order_release);
 131       return;
 132     }
 133
 134     /// May-block versions
 135     ///
 136     uint32_t before = state_.load(std::memory_order_acquire);
 137
 138     assert(before == INIT || before == WAITING || before == TIMED_OUT);
 139
 140     if (before == INIT &&
 141         state_.compare_exchange_strong(
 142             before,
 143             EARLY_DELIVERY,
 144             std::memory_order_release,
 145             std::memory_order_relaxed)) {
 146       return;
 147     }
 148
 149     assert(before == WAITING || before == TIMED_OUT);
 150
 151     if (before == TIMED_OUT) {
 152       return;
 153     }
 154
 155     assert(before == WAITING);
 156     state_.store(LATE_DELIVERY, std::memory_order_release);
 157     state_.futexWake(1);
 158   }
 159
 160   /// Waits until post() has been called in the current Baton lifetime.
 161   /// May be called at most once during a Baton lifetime (construction
 162   /// |reset until destruction|reset).  If post is called before wait in
 163   /// the current lifetime then this method returns immediately.
 164   ///
 165   /// The restriction that there can be at most one wait() per lifetime
 166   /// could be relaxed somewhat without any perf or size regressions,
 167   /// but by making this condition very restrictive we can provide better
 168   /// checking in debug builds.
 169   FOLLY_ALWAYS_INLINE
 170   void wait(const WaitOptions& opt = wait_options()) noexcept {
 171     if (try_wait()) {
 172       return;
 173     }
 174
 175     waitSlow(opt);
 176   }
 177
 178   /// Similar to wait, but doesn't block the thread if it hasn't been posted.
 179   ///
 180   /// try_wait has the following semantics:
 181   /// - It is ok to call try_wait any number times on the same baton until
 182   ///   try_wait reports that the baton has been posted.
 183   /// - It is ok to call timed_wait or wait on the same baton if try_wait
 184   ///   reports that baton hasn't been posted.
 185   /// - If try_wait indicates that the baton has been posted, it is invalid to
 186   ///   call wait, try_wait or timed_wait on the same baton without resetting
 187   ///
 188   /// @return       true if baton has been posted, false othewise
 189   FOLLY_ALWAYS_INLINE bool try_wait() const noexcept {
 190     return ready();
 191   }
 192
 193   /// Similar to wait, but with a timeout. The thread is unblocked if the
 194   /// timeout expires.
 195   /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
 196   /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
 197   /// words, after try_wait_for the caller can't invoke
 198   /// wait/try_wait/try_wait_for/try_wait_until
 199   /// again on the same baton without resetting it.
 200   ///
 201   /// @param  timeout       Time until which the thread can block
 202   /// @return               true if the baton was posted to before timeout,
 203   ///                       false otherwise
 204   template <typename Rep, typename Period>
 205   FOLLY_ALWAYS_INLINE bool try_wait_for(
 206       const std::chrono::duration<Rep, Period>& timeout,
 207       const WaitOptions& opt = wait_options()) noexcept {
 208     if (try_wait()) {
 209       return true;
 210     }
 211
 212     auto deadline = std::chrono::steady_clock::now() + timeout;
 213     return tryWaitUntilSlow(deadline, opt);
 214   }
 215
 216   /// Similar to wait, but with a deadline. The thread is unblocked if the
 217   /// deadline expires.
 218   /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
 219   /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
 220   /// words, after try_wait_until the caller can't invoke
 221   /// wait/try_wait/try_wait_for/try_wait_until
 222   /// again on the same baton without resetting it.
 223   ///
 224   /// @param  deadline      Time until which the thread can block
 225   /// @return               true if the baton was posted to before deadline,
 226   ///                       false otherwise
 227   template <typename Clock, typename Duration>
 228   FOLLY_ALWAYS_INLINE bool try_wait_until(
 229       const std::chrono::time_point<Clock, Duration>& deadline,
 230       const WaitOptions& opt = wait_options()) noexcept {
 231     if (try_wait()) {
 232       return true;
 233     }
 234
 235     return tryWaitUntilSlow(deadline, opt);
 236   }
 237
 238   /// Alias to try_wait_for. Deprecated.
 239   template <typename Rep, typename Period>
 240   FOLLY_ALWAYS_INLINE bool timed_wait(
 241       const std::chrono::duration<Rep, Period>& timeout) noexcept {
 242     return try_wait_for(timeout);
 243   }
 244
 245   /// Alias to try_wait_until. Deprecated.
 246   template <typename Clock, typename Duration>
 247   FOLLY_ALWAYS_INLINE bool timed_wait(
 248       const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
 249     return try_wait_until(deadline);
 250   }
 251
 252  private:
 253   enum State : uint32_t {
 254     INIT = 0,
 255     EARLY_DELIVERY = 1,
 256     WAITING = 2,
 257     LATE_DELIVERY = 3,
 258     TIMED_OUT = 4,
 259   };
 260
 261   // Spin for "some time"
 262   //
 263   // @return       true if we received an early delivery during the wait,
 264   //               false otherwise. If the function returns true then
 265   //               state_ is guaranteed to be EARLY_DELIVERY
 266   template <typename Clock, typename Duration>
 267   bool spinWaitForEarlyDelivery(
 268       const std::chrono::time_point<Clock, Duration>& deadline,
 269       const WaitOptions& opt) noexcept {
 270     auto tbegin = Clock::now();
 271     while (true) {
 272       if (try_wait()) {
 273         return true;
 274       }
 275
 276       auto const tnow = Clock::now();
 277       if (tnow >= deadline) {
 278         return false;
 279       }
 280
 281       // Backward time discontinuity in Clock? revise pre_block starting point
 282       tbegin = std::min(tbegin, tnow);
 283       auto const dur = std::chrono::duration_cast<Duration>(tnow - tbegin);
 284       if (dur >= opt.spin_max()) {
 285         return false;
 286       }
 287
 288       // The pause instruction is the polite way to spin, but it doesn't
 289       // actually affect correctness to omit it if we don't have it.
 290       // Pausing donates the full capabilities of the current core to
 291       // its other hyperthreads for a dozen cycles or so
 292       asm_volatile_pause();
 293     }
 294   }
 295
 296   FOLLY_NOINLINE void waitSlow(const WaitOptions& opt) noexcept {
 297     auto const deadline = std::chrono::steady_clock::time_point::max();
 298     if (spinWaitForEarlyDelivery(deadline, opt)) {
 299       assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
 300       return;
 301     }
 302
 303     if (!MayBlock) {
 304       while (!try_wait()) {
 305         std::this_thread::yield();
 306       }
 307       return;
 308     }
 309
 310     // guess we have to block :(
 311     uint32_t expected = INIT;
 312     if (!state_.compare_exchange_strong(expected, WAITING)) {
 313       // CAS failed, last minute reprieve
 314       assert(expected == EARLY_DELIVERY);
 315       return;
 316     }
 317
 318     while (true) {
 319       detail::MemoryIdler::futexWait(state_, WAITING);
 320
 321       // state_ is the truth even if FUTEX_WAIT reported a matching
 322       // FUTEX_WAKE, since we aren't using type-stable storage and we
 323       // don't guarantee reuse.  The scenario goes like this: thread
 324       // A's last touch of a Baton is a call to wake(), which stores
 325       // LATE_DELIVERY and gets an unlucky context switch before delivering
 326       // the corresponding futexWake.  Thread B sees LATE_DELIVERY
 327       // without consuming a futex event, because it calls futexWait
 328       // with an expected value of WAITING and hence doesn't go to sleep.
 329       // B returns, so the Baton's memory is reused and becomes another
 330       // Baton (or a reuse of this one).  B calls futexWait on the new
 331       // Baton lifetime, then A wakes up and delivers a spurious futexWake
 332       // to the same memory location.  B's futexWait will then report a
 333       // consumed wake event even though state_ is still WAITING.
 334       //
 335       // It would be possible to add an extra state_ dance to communicate
 336       // that the futexWake has been sent so that we can be sure to consume
 337       // it before returning, but that would be a perf and complexity hit.
 338       uint32_t s = state_.load(std::memory_order_acquire);
 339       assert(s == WAITING || s == LATE_DELIVERY);
 340
 341       if (s == LATE_DELIVERY) {
 342         return;
 343       }
 344       // retry
 345     }
 346   }
 347
 348   template <typename Clock, typename Duration>
 349   FOLLY_NOINLINE bool tryWaitUntilSlow(
 350       const std::chrono::time_point<Clock, Duration>& deadline,
 351       const WaitOptions& opt) noexcept {
 352     if (spinWaitForEarlyDelivery(deadline, opt)) {
 353       assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
 354       return true;
 355     }
 356
 357     if (!MayBlock) {
 358       while (true) {
 359         if (try_wait()) {
 360           return true;
 361         }
 362         if (Clock::now() >= deadline) {
 363           return false;
 364         }
 365         std::this_thread::yield();
 366       }
 367     }
 368
 369     // guess we have to block :(
 370     uint32_t expected = INIT;
 371     if (!state_.compare_exchange_strong(expected, WAITING)) {
 372       // CAS failed, last minute reprieve
 373       assert(expected == EARLY_DELIVERY);
 374       return true;
 375     }
 376
 377     while (true) {
 378       auto rv = state_.futexWaitUntil(WAITING, deadline);
 379       if (rv == folly::detail::FutexResult::TIMEDOUT) {
 380         state_.store(TIMED_OUT, std::memory_order_release);
 381         return false;
 382       }
 383
 384       uint32_t s = state_.load(std::memory_order_acquire);
 385       assert(s == WAITING || s == LATE_DELIVERY);
 386       if (s == LATE_DELIVERY) {
 387         return true;
 388       }
 389     }
 390   }
 391
 392   detail::Futex<Atom> state_;
 393 };
 394
 395 } // namespace folly