From: Yedidya Feldblum <yfeldblum@fb.com>
Date: Wed, 6 Dec 2017 06:03:40 +0000 (-0800)
Subject: Move folly/Baton.h to folly/synchronization/
X-Git-Tag: v2017.12.11.00~22
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=b3dc093ef094d6d97c10755e60392124828c5103;p=folly.git

Move folly/Baton.h to folly/synchronization/

Summary: [Folly] Move `folly/Baton.h` to `folly/synchronization/`.

Reviewed By: phoad, Orvid

Differential Revision: D6490282

fbshipit-source-id: 66e2d25ffe3275d576b97b81c1987709000f6649
---

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ac34e7d4..fc0084dc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -537,6 +537,7 @@ if (BUILD_TESTS)
       TEST timeseries_test SOURCES TimeseriesTest.cpp
 
     DIRECTORY synchronization/test/
+      TEST baton_test SOURCES BatonTest.cpp
       TEST call_once_test SOURCES CallOnceTest.cpp
       TEST lifo_sem_test SOURCES LifoSemTests.cpp
 
@@ -558,7 +559,6 @@ if (BUILD_TESTS)
       TEST atomic_linked_list_test SOURCES AtomicLinkedListTest.cpp
       TEST atomic_struct_test SOURCES AtomicStructTest.cpp
       TEST atomic_unordered_map_test SOURCES AtomicUnorderedMapTest.cpp
-      TEST baton_test SOURCES BatonTest.cpp
       TEST bit_iterator_test SOURCES BitIteratorTest.cpp
       TEST bits_test SOURCES BitsTest.cpp
       TEST cacheline_padded_test SOURCES CachelinePaddedTest.cpp
diff --git a/folly/Baton.h b/folly/Baton.h
deleted file mode 100644
index 83323915..00000000
--- a/folly/Baton.h
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Copyright 2017 Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <assert.h>
-#include <errno.h>
-#include <stdint.h>
-#include <atomic>
-#include <thread>
-
-#include <folly/detail/Futex.h>
-#include <folly/detail/MemoryIdler.h>
-#include <folly/portability/Asm.h>
-
-namespace folly {
-
-/// A Baton allows a thread to block once and be awoken. The single
-/// poster version (with SinglePoster == true) captures a single
-/// handoff, and during its lifecycle (from construction/reset to
-/// destruction/reset) a baton must either be post()ed and wait()ed
-/// exactly once each, or not at all.
-///
-/// The multi-poster version (SinglePoster == false) allows multiple
-/// concurrent handoff attempts, the first of which completes the
-/// handoff and the rest if any are idempotent.
-///
-/// Baton includes no internal padding, and is only 4 bytes in size.
-/// Any alignment or padding to avoid false sharing is up to the user.
-///
-/// This is basically a stripped-down semaphore that supports (only a
-/// single call to sem_post, when SinglePoster == true) and a single
-/// call to sem_wait.
-///
-/// The non-blocking version (Blocking == false) provides more speed
-/// by using only load acquire and store release operations in the
-/// critical path, at the cost of disallowing blocking and timing out.
-///
-/// The current posix semaphore sem_t isn't too bad, but this provides
-/// more a bit more speed, inlining, smaller size, a guarantee that
-/// the implementation won't change, and compatibility with
-/// DeterministicSchedule.  By having a much more restrictive
-/// lifecycle we can also add a bunch of assertions that can help to
-/// catch race conditions ahead of time.
-template <
-    template <typename> class Atom = std::atomic,
-    bool SinglePoster = true, //  single vs multiple posters
-    bool Blocking = true> // blocking vs spinning
-struct Baton {
-  constexpr Baton() : state_(INIT) {}
-
-  Baton(Baton const&) = delete;
-  Baton& operator=(Baton const&) = delete;
-
-  /// It is an error to destroy a Baton on which a thread is currently
-  /// wait()ing.  In practice this means that the waiter usually takes
-  /// responsibility for destroying the Baton.
-  ~Baton() {
-    // The docblock for this function says that it can't be called when
-    // there is a concurrent waiter.  We assume a strong version of this
-    // requirement in which the caller must _know_ that this is true, they
-    // are not allowed to be merely lucky.  If two threads are involved,
-    // the destroying thread must actually have synchronized with the
-    // waiting thread after wait() returned.  To convey causality the the
-    // waiting thread must have used release semantics and the destroying
-    // thread must have used acquire semantics for that communication,
-    // so we are guaranteed to see the post-wait() value of state_,
-    // which cannot be WAITING.
-    //
-    // Note that since we only care about a single memory location,
-    // the only two plausible memory orders here are relaxed and seq_cst.
-    assert(state_.load(std::memory_order_relaxed) != WAITING);
-  }
-
-  /// Equivalent to destroying the Baton and creating a new one.  It is
-  /// a bug to call this while there is a waiting thread, so in practice
-  /// the waiter will be the one that resets the baton.
-  void reset() {
-    // See ~Baton for a discussion about why relaxed is okay here
-    assert(state_.load(std::memory_order_relaxed) != WAITING);
-
-    // We use a similar argument to justify the use of a relaxed store
-    // here.  Since both wait() and post() are required to be called
-    // only once per lifetime, no thread can actually call those methods
-    // correctly after a reset() unless it synchronizes with the thread
-    // that performed the reset().  If a post() or wait() on another thread
-    // didn't synchronize, then regardless of what operation we performed
-    // here there would be a race on proper use of the Baton's spec
-    // (although not on any particular load and store).  Put another way,
-    // we don't need to synchronize here because anybody that might rely
-    // on such synchronization is required by the baton rules to perform
-    // an additional synchronization that has the desired effect anyway.
-    //
-    // There is actually a similar argument to be made about the
-    // constructor, in which the fenceless constructor initialization
-    // of state_ is piggybacked on whatever synchronization mechanism
-    // distributes knowledge of the Baton's existence
-    state_.store(INIT, std::memory_order_relaxed);
-  }
-
-  /// Causes wait() to wake up.  For each lifetime of a Baton (where a
-  /// lifetime starts at construction or reset() and ends at
-  /// destruction or reset()) there can be at most one call to post(),
-  /// in the single poster version.  Any thread may call post().
-  void post() {
-    if (!Blocking) {
-      /// Non-blocking version
-      ///
-      assert([&] {
-        auto state = state_.load(std::memory_order_relaxed);
-        return (state == INIT || state == EARLY_DELIVERY);
-      }());
-      state_.store(EARLY_DELIVERY, std::memory_order_release);
-      return;
-    }
-
-    /// Blocking versions
-    ///
-    if (SinglePoster) {
-      /// Single poster version
-      ///
-      uint32_t before = state_.load(std::memory_order_acquire);
-
-      assert(before == INIT || before == WAITING || before == TIMED_OUT);
-
-      if (before == INIT &&
-          state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
-        return;
-      }
-
-      assert(before == WAITING || before == TIMED_OUT);
-
-      if (before == TIMED_OUT) {
-        return;
-      }
-
-      assert(before == WAITING);
-      state_.store(LATE_DELIVERY, std::memory_order_release);
-      state_.futexWake(1);
-    } else {
-      /// Multi-poster version
-      ///
-      while (true) {
-        uint32_t before = state_.load(std::memory_order_acquire);
-
-        if (before == INIT &&
-            state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
-          return;
-        }
-
-        if (before == TIMED_OUT) {
-          return;
-        }
-
-        if (before == EARLY_DELIVERY || before == LATE_DELIVERY) {
-          // The reason for not simply returning (without the following
-          // atomic operation) is to avoid the following case:
-          //
-          //  T1:             T2:             T3:
-          //  local1.post();  local2.post();  global.wait();
-          //  global.post();  global.post();  local1.try_wait() == true;
-          //                                  local2.try_wait() == false;
-          //
-          if (state_.fetch_add(0) != before) {
-            continue;
-          }
-          return;
-        }
-
-        assert(before == WAITING);
-        if (!state_.compare_exchange_weak(before, LATE_DELIVERY)) {
-          continue;
-        }
-        state_.futexWake(1);
-        return;
-      }
-    }
-  }
-
-  /// Waits until post() has been called in the current Baton lifetime.
-  /// May be called at most once during a Baton lifetime (construction
-  /// |reset until destruction|reset).  If post is called before wait in
-  /// the current lifetime then this method returns immediately.
-  ///
-  /// The restriction that there can be at most one wait() per lifetime
-  /// could be relaxed somewhat without any perf or size regressions,
-  /// but by making this condition very restrictive we can provide better
-  /// checking in debug builds.
-  void wait() {
-    if (spinWaitForEarlyDelivery()) {
-      assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
-      return;
-    }
-
-    if (!Blocking) {
-      while (!try_wait()) {
-        std::this_thread::yield();
-      }
-      return;
-    }
-
-    // guess we have to block :(
-    uint32_t expected = INIT;
-    if (!state_.compare_exchange_strong(expected, WAITING)) {
-      // CAS failed, last minute reprieve
-      assert(expected == EARLY_DELIVERY);
-      return;
-    }
-
-    while (true) {
-      detail::MemoryIdler::futexWait(state_, WAITING);
-
-      // state_ is the truth even if FUTEX_WAIT reported a matching
-      // FUTEX_WAKE, since we aren't using type-stable storage and we
-      // don't guarantee reuse.  The scenario goes like this: thread
-      // A's last touch of a Baton is a call to wake(), which stores
-      // LATE_DELIVERY and gets an unlucky context switch before delivering
-      // the corresponding futexWake.  Thread B sees LATE_DELIVERY
-      // without consuming a futex event, because it calls futexWait
-      // with an expected value of WAITING and hence doesn't go to sleep.
-      // B returns, so the Baton's memory is reused and becomes another
-      // Baton (or a reuse of this one).  B calls futexWait on the new
-      // Baton lifetime, then A wakes up and delivers a spurious futexWake
-      // to the same memory location.  B's futexWait will then report a
-      // consumed wake event even though state_ is still WAITING.
-      //
-      // It would be possible to add an extra state_ dance to communicate
-      // that the futexWake has been sent so that we can be sure to consume
-      // it before returning, but that would be a perf and complexity hit.
-      uint32_t s = state_.load(std::memory_order_acquire);
-      assert(s == WAITING || s == LATE_DELIVERY);
-
-      if (s == LATE_DELIVERY) {
-        return;
-      }
-      // retry
-    }
-  }
-
-  /// Similar to wait, but with a timeout. The thread is unblocked if the
-  /// timeout expires.
-  /// Note: Only a single call to timed_wait/wait is allowed during a baton's
-  /// life-cycle (from construction/reset to destruction/reset). In other
-  /// words, after timed_wait the caller can't invoke wait/timed_wait/try_wait
-  /// again on the same baton without resetting it.
-  ///
-  /// @param  deadline      Time until which the thread can block
-  /// @return               true if the baton was posted to before timeout,
-  ///                       false otherwise
-  template <typename Clock, typename Duration = typename Clock::duration>
-  bool timed_wait(const std::chrono::time_point<Clock,Duration>& deadline) {
-    static_assert(Blocking, "Non-blocking Baton does not support timed wait.");
-
-    if (spinWaitForEarlyDelivery()) {
-      assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
-      return true;
-    }
-
-    // guess we have to block :(
-    uint32_t expected = INIT;
-    if (!state_.compare_exchange_strong(expected, WAITING)) {
-      // CAS failed, last minute reprieve
-      assert(expected == EARLY_DELIVERY);
-      return true;
-    }
-
-    while (true) {
-      auto rv = state_.futexWaitUntil(WAITING, deadline);
-      if (rv == folly::detail::FutexResult::TIMEDOUT) {
-        state_.store(TIMED_OUT, std::memory_order_release);
-        return false;
-      }
-
-      uint32_t s = state_.load(std::memory_order_acquire);
-      assert(s == WAITING || s == LATE_DELIVERY);
-      if (s == LATE_DELIVERY) {
-        return true;
-      }
-    }
-  }
-
-  /// Similar to timed_wait, but with a duration.
-  template <typename Clock = std::chrono::steady_clock, typename Duration>
-  bool timed_wait(const Duration& duration) {
-    auto deadline = Clock::now() + duration;
-    return timed_wait(deadline);
-  }
-
-  /// Similar to wait, but doesn't block the thread if it hasn't been posted.
-  ///
-  /// try_wait has the following semantics:
-  /// - It is ok to call try_wait any number times on the same baton until
-  ///   try_wait reports that the baton has been posted.
-  /// - It is ok to call timed_wait or wait on the same baton if try_wait
-  ///   reports that baton hasn't been posted.
-  /// - If try_wait indicates that the baton has been posted, it is invalid to
-  ///   call wait, try_wait or timed_wait on the same baton without resetting
-  ///
-  /// @return       true if baton has been posted, false othewise
-  bool try_wait() const {
-    auto s = state_.load(std::memory_order_acquire);
-    assert(s == INIT || s == EARLY_DELIVERY);
-    return s == EARLY_DELIVERY;
-  }
-
- private:
-  enum State : uint32_t {
-    INIT = 0,
-    EARLY_DELIVERY = 1,
-    WAITING = 2,
-    LATE_DELIVERY = 3,
-    TIMED_OUT = 4
-  };
-
-  enum {
-    // Must be positive.  If multiple threads are actively using a
-    // higher-level data structure that uses batons internally, it is
-    // likely that the post() and wait() calls happen almost at the same
-    // time.  In this state, we lose big 50% of the time if the wait goes
-    // to sleep immediately.  On circa-2013 devbox hardware it costs about
-    // 7 usec to FUTEX_WAIT and then be awoken (half the t/iter as the
-    // posix_sem_pingpong test in BatonTests).  We can improve our chances
-    // of EARLY_DELIVERY by spinning for a bit, although we have to balance
-    // this against the loss if we end up sleeping any way.  Spins on this
-    // hw take about 7 nanos (all but 0.5 nanos is the pause instruction).
-    // We give ourself 300 spins, which is about 2 usec of waiting.  As a
-    // partial consolation, since we are using the pause instruction we
-    // are giving a speed boost to the colocated hyperthread.
-    PreBlockAttempts = 300,
-  };
-
-  // Spin for "some time" (see discussion on PreBlockAttempts) waiting
-  // for a post.
-  //
-  // @return       true if we received an early delivery during the wait,
-  //               false otherwise. If the function returns true then
-  //               state_ is guaranteed to be EARLY_DELIVERY
-  bool spinWaitForEarlyDelivery() {
-
-    static_assert(PreBlockAttempts > 0,
-        "isn't this assert clearer than an uninitialized variable warning?");
-    for (int i = 0; i < PreBlockAttempts; ++i) {
-      if (try_wait()) {
-        // hooray!
-        return true;
-      }
-      // The pause instruction is the polite way to spin, but it doesn't
-      // actually affect correctness to omit it if we don't have it.
-      // Pausing donates the full capabilities of the current core to
-      // its other hyperthreads for a dozen cycles or so
-      asm_volatile_pause();
-    }
-
-    return false;
-  }
-
-  detail::Futex<Atom> state_;
-};
-
-} // namespace folly
diff --git a/folly/Makefile.am b/folly/Makefile.am
index 4ca4388d..98547554 100644
--- a/folly/Makefile.am
+++ b/folly/Makefile.am
@@ -37,7 +37,6 @@ nobase_follyinclude_HEADERS = \
 	AtomicLinkedList.h \
 	AtomicStruct.h \
 	AtomicUnorderedMap.h \
-	Baton.h \
 	Benchmark.h \
 	Bits.h \
 	CachelinePadded.h \
@@ -433,6 +432,7 @@ nobase_follyinclude_HEADERS = \
 	stats/TimeseriesHistogram-defs.h \
 	stats/TimeseriesHistogram.h \
 	synchronization/AsymmetricMemoryBarrier.h \
+	synchronization/Baton.h \
 	synchronization/CallOnce.h \
 	synchronization/LifoSem.h \
 	synchronization/detail/AtomicUtils.h \
diff --git a/folly/Singleton.h b/folly/Singleton.h
index 01a9d5d1..48251b66 100644
--- a/folly/Singleton.h
+++ b/folly/Singleton.h
@@ -121,7 +121,6 @@
 // should call reenableInstances.
 
 #pragma once
-#include <folly/Baton.h>
 #include <folly/Demangle.h>
 #include <folly/Exception.h>
 #include <folly/Executor.h>
@@ -131,6 +130,7 @@
 #include <folly/detail/StaticSingletonManager.h>
 #include <folly/experimental/ReadMostlySharedPtr.h>
 #include <folly/hash/Hash.h>
+#include <folly/synchronization/Baton.h>
 
 #include <algorithm>
 #include <atomic>
diff --git a/folly/executors/ThreadPoolExecutor.h b/folly/executors/ThreadPoolExecutor.h
index a92726ef..0e5bcc17 100644
--- a/folly/executors/ThreadPoolExecutor.h
+++ b/folly/executors/ThreadPoolExecutor.h
@@ -15,7 +15,6 @@
  */
 
 #pragma once
-#include <folly/Baton.h>
 #include <folly/Executor.h>
 #include <folly/Memory.h>
 #include <folly/RWSpinLock.h>
@@ -23,6 +22,7 @@
 #include <folly/executors/task_queue/LifoSemMPMCQueue.h>
 #include <folly/executors/thread_factory/NamedThreadFactory.h>
 #include <folly/io/async/Request.h>
+#include <folly/synchronization/Baton.h>
 
 #include <algorithm>
 #include <mutex>
diff --git a/folly/executors/task_queue/test/UnboundedBlockingQueueTest.cpp b/folly/executors/task_queue/test/UnboundedBlockingQueueTest.cpp
index a7b1efd5..c3e7d0d4 100644
--- a/folly/executors/task_queue/test/UnboundedBlockingQueueTest.cpp
+++ b/folly/executors/task_queue/test/UnboundedBlockingQueueTest.cpp
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 #include <folly/executors/task_queue/UnboundedBlockingQueue.h>
-#include <folly/Baton.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 #include <thread>
 
 using namespace folly;
diff --git a/folly/executors/test/ExecutorTest.cpp b/folly/executors/test/ExecutorTest.cpp
index 2bbc56a1..e017ca78 100644
--- a/folly/executors/test/ExecutorTest.cpp
+++ b/folly/executors/test/ExecutorTest.cpp
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include <folly/Baton.h>
 #include <folly/executors/InlineExecutor.h>
 #include <folly/executors/ManualExecutor.h>
 #include <folly/executors/QueuedImmediateExecutor.h>
 #include <folly/futures/Future.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 // TODO(jsedgwick) move this test to executors/test/ once the tested executors
 // have all moved
diff --git a/folly/executors/test/SerialExecutorTest.cpp b/folly/executors/test/SerialExecutorTest.cpp
index ae487591..5e022a1f 100644
--- a/folly/executors/test/SerialExecutorTest.cpp
+++ b/folly/executors/test/SerialExecutorTest.cpp
@@ -16,11 +16,11 @@
 
 #include <chrono>
 
-#include <folly/Baton.h>
 #include <folly/executors/CPUThreadPoolExecutor.h>
 #include <folly/executors/InlineExecutor.h>
 #include <folly/executors/SerialExecutor.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 using namespace std::chrono;
 using folly::SerialExecutor;
diff --git a/folly/experimental/flat_combining/FlatCombining.h b/folly/experimental/flat_combining/FlatCombining.h
index 63fe1ce7..235e24b2 100644
--- a/folly/experimental/flat_combining/FlatCombining.h
+++ b/folly/experimental/flat_combining/FlatCombining.h
@@ -16,11 +16,11 @@
 
 #pragma once
 
-#include <folly/Baton.h>
 #include <folly/Function.h>
 #include <folly/IndexedMemPool.h>
 #include <folly/Portability.h>
 #include <folly/concurrency/CacheLocality.h>
+#include <folly/synchronization/Baton.h>
 
 #include <atomic>
 #include <cassert>
diff --git a/folly/experimental/flat_combining/test/FlatCombiningExamples.h b/folly/experimental/flat_combining/test/FlatCombiningExamples.h
index 4736dfa8..fe6ab184 100644
--- a/folly/experimental/flat_combining/test/FlatCombiningExamples.h
+++ b/folly/experimental/flat_combining/test/FlatCombiningExamples.h
@@ -20,8 +20,8 @@
 #include <memory>
 #include <mutex>
 
-#include <folly/Baton.h>
 #include <folly/experimental/flat_combining/FlatCombining.h>
+#include <folly/synchronization/Baton.h>
 
 namespace folly {
 
diff --git a/folly/experimental/observer/test/ObserverTest.cpp b/folly/experimental/observer/test/ObserverTest.cpp
index ed372f5a..1c3bb28f 100644
--- a/folly/experimental/observer/test/ObserverTest.cpp
+++ b/folly/experimental/observer/test/ObserverTest.cpp
@@ -16,9 +16,9 @@
 
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/experimental/observer/SimpleObservable.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 using namespace folly::observer;
 
diff --git a/folly/experimental/test/FunctionSchedulerTest.cpp b/folly/experimental/test/FunctionSchedulerTest.cpp
index ec943958..b869f2a8 100644
--- a/folly/experimental/test/FunctionSchedulerTest.cpp
+++ b/folly/experimental/test/FunctionSchedulerTest.cpp
@@ -21,10 +21,10 @@
 
 #include <boost/thread.hpp>
 
-#include <folly/Baton.h>
 #include <folly/Random.h>
 #include <folly/experimental/FunctionScheduler.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 #if defined(__linux__)
 #include <dlfcn.h>
diff --git a/folly/experimental/test/ReadMostlySharedPtrTest.cpp b/folly/experimental/test/ReadMostlySharedPtrTest.cpp
index 08996115..789eeecd 100644
--- a/folly/experimental/test/ReadMostlySharedPtrTest.cpp
+++ b/folly/experimental/test/ReadMostlySharedPtrTest.cpp
@@ -20,11 +20,11 @@
 #include <mutex>
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/Memory.h>
 #include <folly/experimental/RCURefCount.h>
 #include <folly/experimental/ReadMostlySharedPtr.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 using folly::ReadMostlyMainPtr;
 using folly::ReadMostlyWeakPtr;
diff --git a/folly/experimental/test/RefCountTest.cpp b/folly/experimental/test/RefCountTest.cpp
index a63de14d..3c75ae3c 100644
--- a/folly/experimental/test/RefCountTest.cpp
+++ b/folly/experimental/test/RefCountTest.cpp
@@ -15,10 +15,10 @@
  */
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/experimental/RCURefCount.h>
 #include <folly/experimental/TLRefCount.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 namespace folly {
 
diff --git a/folly/fibers/GenericBaton.h b/folly/fibers/GenericBaton.h
index 93dbebae..6cab1bf0 100644
--- a/folly/fibers/GenericBaton.h
+++ b/folly/fibers/GenericBaton.h
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include <folly/Baton.h>
+#include <folly/synchronization/Baton.h>
 
 #include <folly/fibers/Baton.h>
 
diff --git a/folly/futures/Future-inl.h b/folly/futures/Future-inl.h
index 5818244c..be53ff7c 100644
--- a/folly/futures/Future-inl.h
+++ b/folly/futures/Future-inl.h
@@ -21,11 +21,11 @@
 #include <chrono>
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/Optional.h>
 #include <folly/executors/InlineExecutor.h>
 #include <folly/futures/Timekeeper.h>
 #include <folly/futures/detail/Core.h>
+#include <folly/synchronization/Baton.h>
 
 #ifndef FOLLY_FUTURE_USING_FIBER
 #if FOLLY_MOBILE || defined(__APPLE__)
diff --git a/folly/futures/test/Benchmark.cpp b/folly/futures/test/Benchmark.cpp
index a9c743de..336ce7f7 100644
--- a/folly/futures/test/Benchmark.cpp
+++ b/folly/futures/test/Benchmark.cpp
@@ -15,12 +15,12 @@
  */
 
 #include <folly/Benchmark.h>
-#include <folly/Baton.h>
 #include <folly/executors/InlineExecutor.h>
 #include <folly/futures/Future.h>
 #include <folly/futures/Promise.h>
 #include <folly/portability/GFlags.h>
 #include <folly/portability/Semaphore.h>
+#include <folly/synchronization/Baton.h>
 
 #include <vector>
 
diff --git a/folly/futures/test/FutureTest.cpp b/folly/futures/test/FutureTest.cpp
index e269aa15..ae321fde 100644
--- a/folly/futures/test/FutureTest.cpp
+++ b/folly/futures/test/FutureTest.cpp
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <folly/Baton.h>
+#include <folly/futures/Future.h>
 #include <folly/Executor.h>
 #include <folly/Memory.h>
 #include <folly/Unit.h>
 #include <folly/dynamic.h>
-#include <folly/futures/Future.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 #include <algorithm>
 #include <atomic>
diff --git a/folly/futures/test/InterruptTest.cpp b/folly/futures/test/InterruptTest.cpp
index 7c51bb1e..fc87606b 100644
--- a/folly/futures/test/InterruptTest.cpp
+++ b/folly/futures/test/InterruptTest.cpp
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include <folly/Baton.h>
 #include <folly/futures/Future.h>
 #include <folly/futures/Promise.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 using namespace folly;
 
diff --git a/folly/futures/test/SemiFutureTest.cpp b/folly/futures/test/SemiFutureTest.cpp
index 39ea108b..7b927021 100644
--- a/folly/futures/test/SemiFutureTest.cpp
+++ b/folly/futures/test/SemiFutureTest.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <folly/Baton.h>
 #include <folly/Executor.h>
 #include <folly/Memory.h>
 #include <folly/Unit.h>
@@ -22,6 +21,7 @@
 #include <folly/futures/Future.h>
 #include <folly/io/async/EventBase.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 #include <algorithm>
 #include <atomic>
diff --git a/folly/futures/test/ViaTest.cpp b/folly/futures/test/ViaTest.cpp
index fb2008a8..f7a81c99 100644
--- a/folly/futures/test/ViaTest.cpp
+++ b/folly/futures/test/ViaTest.cpp
@@ -16,13 +16,13 @@
 
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/MPMCQueue.h>
 #include <folly/executors/DrivableExecutor.h>
 #include <folly/executors/InlineExecutor.h>
 #include <folly/executors/ManualExecutor.h>
 #include <folly/futures/Future.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 using namespace folly;
 
diff --git a/folly/futures/test/WaitTest.cpp b/folly/futures/test/WaitTest.cpp
index 57e4e172..1d1b287d 100644
--- a/folly/futures/test/WaitTest.cpp
+++ b/folly/futures/test/WaitTest.cpp
@@ -16,10 +16,10 @@
 
 #include <queue>
 
-#include <folly/Baton.h>
 #include <folly/futures/Future.h>
 #include <folly/io/async/EventBase.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 using namespace folly;
 using std::vector;
diff --git a/folly/io/async/EventBase.cpp b/folly/io/async/EventBase.cpp
index c1ed7336..1466d721 100644
--- a/folly/io/async/EventBase.cpp
+++ b/folly/io/async/EventBase.cpp
@@ -26,11 +26,11 @@
 #include <mutex>
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/Memory.h>
 #include <folly/io/async/NotificationQueue.h>
 #include <folly/io/async/VirtualEventBase.h>
 #include <folly/portability/Unistd.h>
+#include <folly/synchronization/Baton.h>
 #include <folly/system/ThreadName.h>
 
 namespace folly {
diff --git a/folly/io/async/ScopedEventBaseThread.h b/folly/io/async/ScopedEventBaseThread.h
index c81ab393..c234dc00 100644
--- a/folly/io/async/ScopedEventBaseThread.h
+++ b/folly/io/async/ScopedEventBaseThread.h
@@ -19,8 +19,8 @@
 #include <memory>
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/io/async/EventBase.h>
+#include <folly/synchronization/Baton.h>
 
 namespace folly {
 
diff --git a/folly/io/async/VirtualEventBase.h b/folly/io/async/VirtualEventBase.h
index 5120f21e..30e8404b 100644
--- a/folly/io/async/VirtualEventBase.h
+++ b/folly/io/async/VirtualEventBase.h
@@ -18,9 +18,9 @@
 
 #include <future>
 
-#include <folly/Baton.h>
 #include <folly/Executor.h>
 #include <folly/io/async/EventBase.h>
+#include <folly/synchronization/Baton.h>
 
 namespace folly {
 
diff --git a/folly/io/async/test/EventBaseThreadTest.cpp b/folly/io/async/test/EventBaseThreadTest.cpp
index e804501e..88ed75b2 100644
--- a/folly/io/async/test/EventBaseThreadTest.cpp
+++ b/folly/io/async/test/EventBaseThreadTest.cpp
@@ -18,9 +18,9 @@
 
 #include <chrono>
 
-#include <folly/Baton.h>
 #include <folly/io/async/EventBaseManager.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 #include <folly/system/ThreadName.h>
 
 using namespace std;
diff --git a/folly/io/async/test/NotificationQueueTest.cpp b/folly/io/async/test/NotificationQueueTest.cpp
index 727eaad2..2ff48d54 100644
--- a/folly/io/async/test/NotificationQueueTest.cpp
+++ b/folly/io/async/test/NotificationQueueTest.cpp
@@ -22,9 +22,9 @@
 #include <list>
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/io/async/ScopedEventBaseThread.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 #ifndef _WIN32
 #include <sys/wait.h>
diff --git a/folly/io/async/test/ScopedEventBaseThreadTest.cpp b/folly/io/async/test/ScopedEventBaseThreadTest.cpp
index 6f1d2266..321a0888 100644
--- a/folly/io/async/test/ScopedEventBaseThreadTest.cpp
+++ b/folly/io/async/test/ScopedEventBaseThreadTest.cpp
@@ -19,10 +19,10 @@
 #include <chrono>
 #include <string>
 
-#include <folly/Baton.h>
 #include <folly/Optional.h>
 #include <folly/io/async/EventBaseManager.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 #include <folly/system/ThreadName.h>
 
 using namespace std;
diff --git a/folly/synchronization/Baton.h b/folly/synchronization/Baton.h
new file mode 100644
index 00000000..83323915
--- /dev/null
+++ b/folly/synchronization/Baton.h
@@ -0,0 +1,373 @@
+/*
+ * Copyright 2017 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+#include <atomic>
+#include <thread>
+
+#include <folly/detail/Futex.h>
+#include <folly/detail/MemoryIdler.h>
+#include <folly/portability/Asm.h>
+
+namespace folly {
+
+/// A Baton allows a thread to block once and be awoken. The single
+/// poster version (with SinglePoster == true) captures a single
+/// handoff, and during its lifecycle (from construction/reset to
+/// destruction/reset) a baton must either be post()ed and wait()ed
+/// exactly once each, or not at all.
+///
+/// The multi-poster version (SinglePoster == false) allows multiple
+/// concurrent handoff attempts, the first of which completes the
+/// handoff and the rest if any are idempotent.
+///
+/// Baton includes no internal padding, and is only 4 bytes in size.
+/// Any alignment or padding to avoid false sharing is up to the user.
+///
+/// This is basically a stripped-down semaphore that supports (only a
+/// single call to sem_post, when SinglePoster == true) and a single
+/// call to sem_wait.
+///
+/// The non-blocking version (Blocking == false) provides more speed
+/// by using only load acquire and store release operations in the
+/// critical path, at the cost of disallowing blocking and timing out.
+///
+/// The current posix semaphore sem_t isn't too bad, but this provides
+/// more a bit more speed, inlining, smaller size, a guarantee that
+/// the implementation won't change, and compatibility with
+/// DeterministicSchedule.  By having a much more restrictive
+/// lifecycle we can also add a bunch of assertions that can help to
+/// catch race conditions ahead of time.
+template <
+    template <typename> class Atom = std::atomic,
+    bool SinglePoster = true, //  single vs multiple posters
+    bool Blocking = true> // blocking vs spinning
+struct Baton {
+  constexpr Baton() : state_(INIT) {}
+
+  Baton(Baton const&) = delete;
+  Baton& operator=(Baton const&) = delete;
+
+  /// It is an error to destroy a Baton on which a thread is currently
+  /// wait()ing.  In practice this means that the waiter usually takes
+  /// responsibility for destroying the Baton.
+  ~Baton() {
+    // The docblock for this function says that it can't be called when
+    // there is a concurrent waiter.  We assume a strong version of this
+    // requirement in which the caller must _know_ that this is true, they
+    // are not allowed to be merely lucky.  If two threads are involved,
+    // the destroying thread must actually have synchronized with the
+    // waiting thread after wait() returned.  To convey causality the the
+    // waiting thread must have used release semantics and the destroying
+    // thread must have used acquire semantics for that communication,
+    // so we are guaranteed to see the post-wait() value of state_,
+    // which cannot be WAITING.
+    //
+    // Note that since we only care about a single memory location,
+    // the only two plausible memory orders here are relaxed and seq_cst.
+    assert(state_.load(std::memory_order_relaxed) != WAITING);
+  }
+
+  /// Equivalent to destroying the Baton and creating a new one.  It is
+  /// a bug to call this while there is a waiting thread, so in practice
+  /// the waiter will be the one that resets the baton.
+  void reset() {
+    // See ~Baton for a discussion about why relaxed is okay here
+    assert(state_.load(std::memory_order_relaxed) != WAITING);
+
+    // We use a similar argument to justify the use of a relaxed store
+    // here.  Since both wait() and post() are required to be called
+    // only once per lifetime, no thread can actually call those methods
+    // correctly after a reset() unless it synchronizes with the thread
+    // that performed the reset().  If a post() or wait() on another thread
+    // didn't synchronize, then regardless of what operation we performed
+    // here there would be a race on proper use of the Baton's spec
+    // (although not on any particular load and store).  Put another way,
+    // we don't need to synchronize here because anybody that might rely
+    // on such synchronization is required by the baton rules to perform
+    // an additional synchronization that has the desired effect anyway.
+    //
+    // There is actually a similar argument to be made about the
+    // constructor, in which the fenceless constructor initialization
+    // of state_ is piggybacked on whatever synchronization mechanism
+    // distributes knowledge of the Baton's existence
+    state_.store(INIT, std::memory_order_relaxed);
+  }
+
+  /// Causes wait() to wake up.  For each lifetime of a Baton (where a
+  /// lifetime starts at construction or reset() and ends at
+  /// destruction or reset()) there can be at most one call to post(),
+  /// in the single poster version.  Any thread may call post().
+  void post() {
+    if (!Blocking) {
+      /// Non-blocking version
+      ///
+      assert([&] {
+        auto state = state_.load(std::memory_order_relaxed);
+        return (state == INIT || state == EARLY_DELIVERY);
+      }());
+      state_.store(EARLY_DELIVERY, std::memory_order_release);
+      return;
+    }
+
+    /// Blocking versions
+    ///
+    if (SinglePoster) {
+      /// Single poster version
+      ///
+      uint32_t before = state_.load(std::memory_order_acquire);
+
+      assert(before == INIT || before == WAITING || before == TIMED_OUT);
+
+      if (before == INIT &&
+          state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
+        return;
+      }
+
+      assert(before == WAITING || before == TIMED_OUT);
+
+      if (before == TIMED_OUT) {
+        return;
+      }
+
+      assert(before == WAITING);
+      state_.store(LATE_DELIVERY, std::memory_order_release);
+      state_.futexWake(1);
+    } else {
+      /// Multi-poster version
+      ///
+      while (true) {
+        uint32_t before = state_.load(std::memory_order_acquire);
+
+        if (before == INIT &&
+            state_.compare_exchange_strong(before, EARLY_DELIVERY)) {
+          return;
+        }
+
+        if (before == TIMED_OUT) {
+          return;
+        }
+
+        if (before == EARLY_DELIVERY || before == LATE_DELIVERY) {
+          // The reason for not simply returning (without the following
+          // atomic operation) is to avoid the following case:
+          //
+          //  T1:             T2:             T3:
+          //  local1.post();  local2.post();  global.wait();
+          //  global.post();  global.post();  local1.try_wait() == true;
+          //                                  local2.try_wait() == false;
+          //
+          if (state_.fetch_add(0) != before) {
+            continue;
+          }
+          return;
+        }
+
+        assert(before == WAITING);
+        if (!state_.compare_exchange_weak(before, LATE_DELIVERY)) {
+          continue;
+        }
+        state_.futexWake(1);
+        return;
+      }
+    }
+  }
+
+  /// Waits until post() has been called in the current Baton lifetime.
+  /// May be called at most once during a Baton lifetime (construction
+  /// |reset until destruction|reset).  If post is called before wait in
+  /// the current lifetime then this method returns immediately.
+  ///
+  /// The restriction that there can be at most one wait() per lifetime
+  /// could be relaxed somewhat without any perf or size regressions,
+  /// but by making this condition very restrictive we can provide better
+  /// checking in debug builds.
+  void wait() {
+    if (spinWaitForEarlyDelivery()) {
+      assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
+      return;
+    }
+
+    if (!Blocking) {
+      while (!try_wait()) {
+        std::this_thread::yield();
+      }
+      return;
+    }
+
+    // guess we have to block :(
+    uint32_t expected = INIT;
+    if (!state_.compare_exchange_strong(expected, WAITING)) {
+      // CAS failed, last minute reprieve
+      assert(expected == EARLY_DELIVERY);
+      return;
+    }
+
+    while (true) {
+      detail::MemoryIdler::futexWait(state_, WAITING);
+
+      // state_ is the truth even if FUTEX_WAIT reported a matching
+      // FUTEX_WAKE, since we aren't using type-stable storage and we
+      // don't guarantee reuse.  The scenario goes like this: thread
+      // A's last touch of a Baton is a call to wake(), which stores
+      // LATE_DELIVERY and gets an unlucky context switch before delivering
+      // the corresponding futexWake.  Thread B sees LATE_DELIVERY
+      // without consuming a futex event, because it calls futexWait
+      // with an expected value of WAITING and hence doesn't go to sleep.
+      // B returns, so the Baton's memory is reused and becomes another
+      // Baton (or a reuse of this one).  B calls futexWait on the new
+      // Baton lifetime, then A wakes up and delivers a spurious futexWake
+      // to the same memory location.  B's futexWait will then report a
+      // consumed wake event even though state_ is still WAITING.
+      //
+      // It would be possible to add an extra state_ dance to communicate
+      // that the futexWake has been sent so that we can be sure to consume
+      // it before returning, but that would be a perf and complexity hit.
+      uint32_t s = state_.load(std::memory_order_acquire);
+      assert(s == WAITING || s == LATE_DELIVERY);
+
+      if (s == LATE_DELIVERY) {
+        return;
+      }
+      // retry
+    }
+  }
+
+  /// Similar to wait, but with a timeout. The thread is unblocked if the
+  /// timeout expires.
+  /// Note: Only a single call to timed_wait/wait is allowed during a baton's
+  /// life-cycle (from construction/reset to destruction/reset). In other
+  /// words, after timed_wait the caller can't invoke wait/timed_wait/try_wait
+  /// again on the same baton without resetting it.
+  ///
+  /// @param  deadline      Time until which the thread can block
+  /// @return               true if the baton was posted to before timeout,
+  ///                       false otherwise
+  template <typename Clock, typename Duration = typename Clock::duration>
+  bool timed_wait(const std::chrono::time_point<Clock,Duration>& deadline) {
+    static_assert(Blocking, "Non-blocking Baton does not support timed wait.");
+
+    if (spinWaitForEarlyDelivery()) {
+      assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
+      return true;
+    }
+
+    // guess we have to block :(
+    uint32_t expected = INIT;
+    if (!state_.compare_exchange_strong(expected, WAITING)) {
+      // CAS failed, last minute reprieve
+      assert(expected == EARLY_DELIVERY);
+      return true;
+    }
+
+    while (true) {
+      auto rv = state_.futexWaitUntil(WAITING, deadline);
+      if (rv == folly::detail::FutexResult::TIMEDOUT) {
+        state_.store(TIMED_OUT, std::memory_order_release);
+        return false;
+      }
+
+      uint32_t s = state_.load(std::memory_order_acquire);
+      assert(s == WAITING || s == LATE_DELIVERY);
+      if (s == LATE_DELIVERY) {
+        return true;
+      }
+    }
+  }
+
+  /// Similar to timed_wait, but with a duration.
+  template <typename Clock = std::chrono::steady_clock, typename Duration>
+  bool timed_wait(const Duration& duration) {
+    auto deadline = Clock::now() + duration;
+    return timed_wait(deadline);
+  }
+
+  /// Similar to wait, but doesn't block the thread if it hasn't been posted.
+  ///
+  /// try_wait has the following semantics:
+  /// - It is ok to call try_wait any number times on the same baton until
+  ///   try_wait reports that the baton has been posted.
+  /// - It is ok to call timed_wait or wait on the same baton if try_wait
+  ///   reports that baton hasn't been posted.
+  /// - If try_wait indicates that the baton has been posted, it is invalid to
+  ///   call wait, try_wait or timed_wait on the same baton without resetting
+  ///
+  /// @return       true if baton has been posted, false othewise
+  bool try_wait() const {
+    auto s = state_.load(std::memory_order_acquire);
+    assert(s == INIT || s == EARLY_DELIVERY);
+    return s == EARLY_DELIVERY;
+  }
+
+ private:
+  enum State : uint32_t {
+    INIT = 0,
+    EARLY_DELIVERY = 1,
+    WAITING = 2,
+    LATE_DELIVERY = 3,
+    TIMED_OUT = 4
+  };
+
+  enum {
+    // Must be positive.  If multiple threads are actively using a
+    // higher-level data structure that uses batons internally, it is
+    // likely that the post() and wait() calls happen almost at the same
+    // time.  In this state, we lose big 50% of the time if the wait goes
+    // to sleep immediately.  On circa-2013 devbox hardware it costs about
+    // 7 usec to FUTEX_WAIT and then be awoken (half the t/iter as the
+    // posix_sem_pingpong test in BatonTests).  We can improve our chances
+    // of EARLY_DELIVERY by spinning for a bit, although we have to balance
+    // this against the loss if we end up sleeping any way.  Spins on this
+    // hw take about 7 nanos (all but 0.5 nanos is the pause instruction).
+    // We give ourself 300 spins, which is about 2 usec of waiting.  As a
+    // partial consolation, since we are using the pause instruction we
+    // are giving a speed boost to the colocated hyperthread.
+    PreBlockAttempts = 300,
+  };
+
+  // Spin for "some time" (see discussion on PreBlockAttempts) waiting
+  // for a post.
+  //
+  // @return       true if we received an early delivery during the wait,
+  //               false otherwise. If the function returns true then
+  //               state_ is guaranteed to be EARLY_DELIVERY
+  bool spinWaitForEarlyDelivery() {
+
+    static_assert(PreBlockAttempts > 0,
+        "isn't this assert clearer than an uninitialized variable warning?");
+    for (int i = 0; i < PreBlockAttempts; ++i) {
+      if (try_wait()) {
+        // hooray!
+        return true;
+      }
+      // The pause instruction is the polite way to spin, but it doesn't
+      // actually affect correctness to omit it if we don't have it.
+      // Pausing donates the full capabilities of the current core to
+      // its other hyperthreads for a dozen cycles or so
+      asm_volatile_pause();
+    }
+
+    return false;
+  }
+
+  detail::Futex<Atom> state_;
+};
+
+} // namespace folly
diff --git a/folly/synchronization/LifoSem.h b/folly/synchronization/LifoSem.h
index 90176094..ef27df4e 100644
--- a/folly/synchronization/LifoSem.h
+++ b/folly/synchronization/LifoSem.h
@@ -24,10 +24,10 @@
 #include <system_error>
 
 #include <folly/AtomicStruct.h>
-#include <folly/Baton.h>
 #include <folly/CachelinePadded.h>
 #include <folly/IndexedMemPool.h>
 #include <folly/Likely.h>
+#include <folly/synchronization/Baton.h>
 
 namespace folly {
 
diff --git a/folly/synchronization/test/BatonBenchmark.cpp b/folly/synchronization/test/BatonBenchmark.cpp
new file mode 100644
index 00000000..25126282
--- /dev/null
+++ b/folly/synchronization/test/BatonBenchmark.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2017 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/synchronization/Baton.h>
+
+#include <thread>
+
+#include <folly/Benchmark.h>
+#include <folly/portability/GFlags.h>
+#include <folly/portability/GTest.h>
+#include <folly/portability/Semaphore.h>
+#include <folly/synchronization/test/BatonTestHelpers.h>
+#include <folly/test/DeterministicSchedule.h>
+
+using namespace folly;
+using namespace folly::test;
+using folly::detail::EmulatedFutexAtomic;
+
+typedef DeterministicSchedule DSched;
+
+BENCHMARK(baton_pingpong_single_poster_blocking, iters) {
+  run_pingpong_test<std::atomic, true, true>(iters);
+}
+
+BENCHMARK(baton_pingpong_multi_poster_blocking, iters) {
+  run_pingpong_test<std::atomic, false, true>(iters);
+}
+
+BENCHMARK(baton_pingpong_single_poster_nonblocking, iters) {
+  run_pingpong_test<std::atomic, true, false>(iters);
+}
+
+BENCHMARK(baton_pingpong_multi_poster_nonblocking, iters) {
+  run_pingpong_test<std::atomic, false, false>(iters);
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(baton_pingpong_emulated_futex_single_poster_blocking, iters) {
+  run_pingpong_test<EmulatedFutexAtomic, true, true>(iters);
+}
+
+BENCHMARK(baton_pingpong_emulated_futex_multi_poster_blocking, iters) {
+  run_pingpong_test<EmulatedFutexAtomic, false, true>(iters);
+}
+
+BENCHMARK(baton_pingpong_emulated_futex_single_poster_nonblocking, iters) {
+  run_pingpong_test<EmulatedFutexAtomic, true, false>(iters);
+}
+
+BENCHMARK(baton_pingpong_emulated_futex_multi_poster_nonblocking, iters) {
+  run_pingpong_test<EmulatedFutexAtomic, false, false>(iters);
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(posix_sem_pingpong, iters) {
+  sem_t sems[3];
+  sem_t* a = sems + 0;
+  sem_t* b = sems + 2; // to get it on a different cache line
+
+  sem_init(a, 0, 0);
+  sem_init(b, 0, 0);
+  auto thr = std::thread([=] {
+    for (size_t i = 0; i < iters; ++i) {
+      sem_wait(a);
+      sem_post(b);
+    }
+  });
+  for (size_t i = 0; i < iters; ++i) {
+    sem_post(a);
+    sem_wait(b);
+  }
+  thr.join();
+}
+
+// I am omitting a benchmark result snapshot because these microbenchmarks
+// mainly illustrate that PreBlockAttempts is very effective for rapid
+// handoffs.  The performance of Baton and sem_t is essentially identical
+// to the required futex calls for the blocking case
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  auto rv = RUN_ALL_TESTS();
+  if (!rv && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return rv;
+}
diff --git a/folly/synchronization/test/BatonTest.cpp b/folly/synchronization/test/BatonTest.cpp
new file mode 100644
index 00000000..2e6f5164
--- /dev/null
+++ b/folly/synchronization/test/BatonTest.cpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright 2017 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/synchronization/Baton.h>
+
+#include <thread>
+
+#include <folly/portability/GTest.h>
+#include <folly/synchronization/test/BatonTestHelpers.h>
+#include <folly/test/DeterministicSchedule.h>
+
+using namespace folly;
+using namespace folly::test;
+using folly::detail::EmulatedFutexAtomic;
+
+/// Basic test
+
+TEST(Baton, basic_single_poster_blocking) {
+  run_basic_test<std::atomic, true, true>();
+  run_basic_test<EmulatedFutexAtomic, true, true>();
+  run_basic_test<DeterministicAtomic, true, true>();
+}
+
+TEST(Baton, basic_single_poster_nonblocking) {
+  run_basic_test<std::atomic, true, false>();
+  run_basic_test<EmulatedFutexAtomic, true, false>();
+  run_basic_test<DeterministicAtomic, true, false>();
+}
+
+TEST(Baton, basic_multi_poster_blocking) {
+  run_basic_test<std::atomic, false, true>();
+}
+
+TEST(Baton, basic_multi_poster_nonblocking) {
+  run_basic_test<std::atomic, false, false>();
+}
+
+/// Ping pong tests
+
+TEST(Baton, pingpong_single_poster_blocking) {
+  DSched sched(DSched::uniform(0));
+
+  run_pingpong_test<DeterministicAtomic, true, true>(1000);
+}
+
+TEST(Baton, pingpong_single_poster_nonblocking) {
+  DSched sched(DSched::uniform(0));
+
+  run_pingpong_test<DeterministicAtomic, true, false>(1000);
+}
+
+TEST(Baton, pingpong_multi_poster_blocking) {
+  DSched sched(DSched::uniform(0));
+
+  run_pingpong_test<DeterministicAtomic, false, true>(1000);
+}
+
+TEST(Baton, pingpong_multi_poster_nonblocking) {
+  DSched sched(DSched::uniform(0));
+
+  run_pingpong_test<DeterministicAtomic, false, false>(1000);
+}
+
+/// Timed wait tests - Nonblocking Baton does not support timed_wait()
+
+// Timed wait basic system clock tests
+
+TEST(Baton, timed_wait_basic_system_clock_single_poster) {
+  run_basic_timed_wait_tests<std::atomic, std::chrono::system_clock, true>();
+  run_basic_timed_wait_tests<
+      EmulatedFutexAtomic,
+      std::chrono::system_clock,
+      true>();
+  run_basic_timed_wait_tests<
+      DeterministicAtomic,
+      std::chrono::system_clock,
+      true>();
+}
+
+TEST(Baton, timed_wait_basic_system_clock_multi_poster) {
+  run_basic_timed_wait_tests<std::atomic, std::chrono::system_clock, false>();
+  run_basic_timed_wait_tests<
+      EmulatedFutexAtomic,
+      std::chrono::system_clock,
+      false>();
+  run_basic_timed_wait_tests<
+      DeterministicAtomic,
+      std::chrono::system_clock,
+      false>();
+}
+
+// Timed wait timeout system clock tests
+
+TEST(Baton, timed_wait_timeout_system_clock_single_poster) {
+  run_timed_wait_tmo_tests<std::atomic, std::chrono::system_clock, true>();
+  run_timed_wait_tmo_tests<
+      EmulatedFutexAtomic,
+      std::chrono::system_clock,
+      true>();
+  run_timed_wait_tmo_tests<
+      DeterministicAtomic,
+      std::chrono::system_clock,
+      true>();
+}
+
+TEST(Baton, timed_wait_timeout_system_clock_multi_poster) {
+  run_timed_wait_tmo_tests<std::atomic, std::chrono::system_clock, false>();
+  run_timed_wait_tmo_tests<
+      EmulatedFutexAtomic,
+      std::chrono::system_clock,
+      false>();
+  run_timed_wait_tmo_tests<
+      DeterministicAtomic,
+      std::chrono::system_clock,
+      false>();
+}
+
+// Timed wait regular system clock tests
+
+TEST(Baton, timed_wait_system_clock_single_poster) {
+  run_timed_wait_regular_test<std::atomic, std::chrono::system_clock, true>();
+  run_timed_wait_regular_test<
+      EmulatedFutexAtomic,
+      std::chrono::system_clock,
+      true>();
+  run_timed_wait_regular_test<
+      DeterministicAtomic,
+      std::chrono::system_clock,
+      true>();
+}
+
+TEST(Baton, timed_wait_system_clock_multi_poster) {
+  run_timed_wait_regular_test<std::atomic, std::chrono::system_clock, false>();
+  run_timed_wait_regular_test<
+      EmulatedFutexAtomic,
+      std::chrono::system_clock,
+      false>();
+  run_timed_wait_regular_test<
+      DeterministicAtomic,
+      std::chrono::system_clock,
+      false>();
+}
+
+// Timed wait basic steady clock tests
+
+TEST(Baton, timed_wait_basic_steady_clock_single_poster) {
+  run_basic_timed_wait_tests<std::atomic, std::chrono::steady_clock, true>();
+  run_basic_timed_wait_tests<
+      EmulatedFutexAtomic,
+      std::chrono::steady_clock,
+      true>();
+  run_basic_timed_wait_tests<
+      DeterministicAtomic,
+      std::chrono::steady_clock,
+      true>();
+}
+
+TEST(Baton, timed_wait_basic_steady_clock_multi_poster) {
+  run_basic_timed_wait_tests<std::atomic, std::chrono::steady_clock, false>();
+  run_basic_timed_wait_tests<
+      EmulatedFutexAtomic,
+      std::chrono::steady_clock,
+      false>();
+  run_basic_timed_wait_tests<
+      DeterministicAtomic,
+      std::chrono::steady_clock,
+      false>();
+}
+
+// Timed wait timeout steady clock tests
+
+TEST(Baton, timed_wait_timeout_steady_clock_single_poster) {
+  run_timed_wait_tmo_tests<std::atomic, std::chrono::steady_clock, true>();
+  run_timed_wait_tmo_tests<
+      EmulatedFutexAtomic,
+      std::chrono::steady_clock,
+      true>();
+  run_timed_wait_tmo_tests<
+      DeterministicAtomic,
+      std::chrono::steady_clock,
+      true>();
+}
+
+TEST(Baton, timed_wait_timeout_steady_clock_multi_poster) {
+  run_timed_wait_tmo_tests<std::atomic, std::chrono::steady_clock, false>();
+  run_timed_wait_tmo_tests<
+      EmulatedFutexAtomic,
+      std::chrono::steady_clock,
+      false>();
+  run_timed_wait_tmo_tests<
+      DeterministicAtomic,
+      std::chrono::steady_clock,
+      false>();
+}
+
+// Timed wait regular steady clock tests
+
+TEST(Baton, timed_wait_steady_clock_single_poster) {
+  run_timed_wait_regular_test<std::atomic, std::chrono::steady_clock, true>();
+  run_timed_wait_regular_test<
+      EmulatedFutexAtomic,
+      std::chrono::steady_clock,
+      true>();
+  run_timed_wait_regular_test<
+      DeterministicAtomic,
+      std::chrono::steady_clock,
+      true>();
+}
+
+TEST(Baton, timed_wait_steady_clock_multi_poster) {
+  run_timed_wait_regular_test<std::atomic, std::chrono::steady_clock, false>();
+  run_timed_wait_regular_test<
+      EmulatedFutexAtomic,
+      std::chrono::steady_clock,
+      false>();
+  run_timed_wait_regular_test<
+      DeterministicAtomic,
+      std::chrono::steady_clock,
+      false>();
+}
+
+/// Try wait tests
+
+TEST(Baton, try_wait_single_poster_blocking) {
+  run_try_wait_tests<std::atomic, true, true>();
+  run_try_wait_tests<EmulatedFutexAtomic, true, true>();
+  run_try_wait_tests<DeterministicAtomic, true, true>();
+}
+
+TEST(Baton, try_wait_single_poster_nonblocking) {
+  run_try_wait_tests<std::atomic, true, false>();
+  run_try_wait_tests<EmulatedFutexAtomic, true, false>();
+  run_try_wait_tests<DeterministicAtomic, true, false>();
+}
+
+TEST(Baton, try_wait_multi_poster_blocking) {
+  run_try_wait_tests<std::atomic, false, true>();
+  run_try_wait_tests<EmulatedFutexAtomic, false, true>();
+  run_try_wait_tests<DeterministicAtomic, false, true>();
+}
+
+TEST(Baton, try_wait_multi_poster_nonblocking) {
+  run_try_wait_tests<std::atomic, false, false>();
+  run_try_wait_tests<EmulatedFutexAtomic, false, false>();
+  run_try_wait_tests<DeterministicAtomic, false, false>();
+}
+
+/// Multi-producer tests
+
+TEST(Baton, multi_producer_single_poster_blocking) {
+  run_try_wait_tests<std::atomic, true, true>();
+  run_try_wait_tests<EmulatedFutexAtomic, true, true>();
+  run_try_wait_tests<DeterministicAtomic, true, true>();
+}
+
+TEST(Baton, multi_producer_single_poster_nonblocking) {
+  run_try_wait_tests<std::atomic, true, false>();
+  run_try_wait_tests<EmulatedFutexAtomic, true, false>();
+  run_try_wait_tests<DeterministicAtomic, true, false>();
+}
+
+TEST(Baton, multi_producer_multi_poster_blocking) {
+  run_try_wait_tests<std::atomic, false, true>();
+  run_try_wait_tests<EmulatedFutexAtomic, false, true>();
+  run_try_wait_tests<DeterministicAtomic, false, true>();
+}
+
+TEST(Baton, multi_producer_multi_poster_nonblocking) {
+  run_try_wait_tests<std::atomic, false, false>();
+  run_try_wait_tests<EmulatedFutexAtomic, false, false>();
+  run_try_wait_tests<DeterministicAtomic, false, false>();
+}
diff --git a/folly/synchronization/test/BatonTestHelpers.h b/folly/synchronization/test/BatonTestHelpers.h
new file mode 100644
index 00000000..6e242e2a
--- /dev/null
+++ b/folly/synchronization/test/BatonTestHelpers.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2017 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
+#include <folly/test/DeterministicSchedule.h>
+
+namespace folly {
+namespace test {
+
+typedef DeterministicSchedule DSched;
+
+template <template <typename> class Atom, bool SinglePoster, bool Blocking>
+void run_basic_test() {
+  Baton<Atom, SinglePoster, Blocking> b;
+  b.post();
+  b.wait();
+}
+
+template <template <typename> class Atom, bool SinglePoster, bool Blocking>
+void run_pingpong_test(int numRounds) {
+  using B = Baton<Atom, SinglePoster, Blocking>;
+  B batons[17];
+  B& a = batons[0];
+  B& b = batons[16]; // to get it on a different cache line
+  auto thr = DSched::thread([&] {
+    for (int i = 0; i < numRounds; ++i) {
+      a.wait();
+      a.reset();
+      b.post();
+    }
+  });
+  for (int i = 0; i < numRounds; ++i) {
+    a.post();
+    b.wait();
+    b.reset();
+  }
+  DSched::join(thr);
+}
+
+template <template <typename> class Atom, typename Clock, bool SinglePoster>
+void run_basic_timed_wait_tests() {
+  Baton<Atom, SinglePoster> b;
+  b.post();
+  // tests if early delivery works fine
+  EXPECT_TRUE(b.timed_wait(Clock::now()));
+}
+
+template <template <typename> class Atom, typename Clock, bool SinglePoster>
+void run_timed_wait_tmo_tests() {
+  Baton<Atom, SinglePoster> b;
+
+  auto thr = DSched::thread([&] {
+    bool rv = b.timed_wait(Clock::now() + std::chrono::milliseconds(1));
+    // main thread is guaranteed to not post until timeout occurs
+    EXPECT_FALSE(rv);
+  });
+  DSched::join(thr);
+}
+
+template <template <typename> class Atom, typename Clock, bool SinglePoster>
+void run_timed_wait_regular_test() {
+  Baton<Atom, SinglePoster> b;
+
+  auto thr = DSched::thread([&] {
+    // To wait forever we'd like to use time_point<Clock>::max, but
+    // std::condition_variable does math to convert the timeout to
+    // system_clock without handling overflow.
+    auto farFuture = Clock::now() + std::chrono::hours(1000);
+    bool rv = b.timed_wait(farFuture);
+    if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
+      // DeterministicAtomic ignores actual times, so doesn't guarantee
+      // a lack of timeout
+      EXPECT_TRUE(rv);
+    }
+  });
+
+  if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
+    // If we are using std::atomic (or EmulatedFutexAtomic) then
+    // a sleep here guarantees to a large extent that 'thr' will
+    // execute wait before we post it, thus testing late delivery. For
+    // DeterministicAtomic, we just rely on DeterministicSchedule to do
+    // the scheduling.  The test won't fail if we lose the race, we just
+    // don't get coverage.
+    std::this_thread::sleep_for(std::chrono::milliseconds(2));
+  }
+
+  b.post();
+  DSched::join(thr);
+}
+
+template <template <typename> class Atom, bool SinglePoster, bool Blocking>
+void run_try_wait_tests() {
+  Baton<Atom, SinglePoster, Blocking> b;
+  EXPECT_FALSE(b.try_wait());
+  b.post();
+  EXPECT_TRUE(b.try_wait());
+}
+
+template <template <typename> class Atom, bool SinglePoster, bool Blocking>
+void run_multi_producer_tests() {
+  constexpr int NPROD = 5;
+  Baton<Atom, SinglePoster, Blocking> local_ping[NPROD];
+  Baton<Atom, SinglePoster, Blocking> local_pong[NPROD];
+  Baton<Atom, /* SingleProducer = */ false, Blocking> global;
+  Baton<Atom, SinglePoster, Blocking> shutdown;
+
+  std::thread prod[NPROD];
+  for (int i = 0; i < NPROD; ++i) {
+    prod[i] = DSched::thread([&, i] {
+      if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
+        // If we are using std::atomic (or EmulatedFutexAtomic) then
+        // a variable sleep here will make it more likely that
+        // global.post()-s will span more than one global.wait() by
+        // the consumer thread and for the latter to block (if the
+        // global baton is blocking). For DeterministicAtomic, we just
+        // rely on DeterministicSchedule to do the scheduling.  The
+        // test won't fail if we lose the race, we just don't get
+        // coverage.
+        for (int j = 0; j < i; ++j) {
+          std::this_thread::sleep_for(std::chrono::microseconds(1));
+        }
+      }
+      local_ping[i].post();
+      global.post();
+      local_pong[i].wait();
+    });
+  }
+
+  auto cons = DSched::thread([&] {
+    while (true) {
+      global.wait();
+      global.reset();
+      if (shutdown.try_wait()) {
+        return;
+      }
+      for (int i = 0; i < NPROD; ++i) {
+        if (local_ping.try_wait()) {
+          local_ping.reset();
+          local_pong.post();
+        }
+      }
+    }
+  });
+
+  for (auto& t : prod) {
+    DSched::join(t);
+  }
+
+  global.post();
+  shutdown.post();
+  DSched::join(cons);
+}
+
+} // namespace test
+} // namespace folly
diff --git a/folly/system/test/ThreadNameTest.cpp b/folly/system/test/ThreadNameTest.cpp
index 9dce6bd9..c4150247 100644
--- a/folly/system/test/ThreadNameTest.cpp
+++ b/folly/system/test/ThreadNameTest.cpp
@@ -16,9 +16,9 @@
 
 #include <thread>
 
-#include <folly/Baton.h>
 #include <folly/ScopeGuard.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 #include <folly/system/ThreadName.h>
 
 using namespace std;
diff --git a/folly/test/BatonBenchmark.cpp b/folly/test/BatonBenchmark.cpp
deleted file mode 100644
index 82e3b5e3..00000000
--- a/folly/test/BatonBenchmark.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright 2017 Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <folly/Baton.h>
-
-#include <thread>
-
-#include <folly/Benchmark.h>
-#include <folly/portability/GFlags.h>
-#include <folly/portability/GTest.h>
-#include <folly/portability/Semaphore.h>
-#include <folly/test/BatonTestHelpers.h>
-#include <folly/test/DeterministicSchedule.h>
-
-using namespace folly;
-using namespace folly::test;
-using folly::detail::EmulatedFutexAtomic;
-
-typedef DeterministicSchedule DSched;
-
-BENCHMARK(baton_pingpong_single_poster_blocking, iters) {
-  run_pingpong_test<std::atomic, true, true>(iters);
-}
-
-BENCHMARK(baton_pingpong_multi_poster_blocking, iters) {
-  run_pingpong_test<std::atomic, false, true>(iters);
-}
-
-BENCHMARK(baton_pingpong_single_poster_nonblocking, iters) {
-  run_pingpong_test<std::atomic, true, false>(iters);
-}
-
-BENCHMARK(baton_pingpong_multi_poster_nonblocking, iters) {
-  run_pingpong_test<std::atomic, false, false>(iters);
-}
-
-BENCHMARK_DRAW_LINE()
-
-BENCHMARK(baton_pingpong_emulated_futex_single_poster_blocking, iters) {
-  run_pingpong_test<EmulatedFutexAtomic, true, true>(iters);
-}
-
-BENCHMARK(baton_pingpong_emulated_futex_multi_poster_blocking, iters) {
-  run_pingpong_test<EmulatedFutexAtomic, false, true>(iters);
-}
-
-BENCHMARK(baton_pingpong_emulated_futex_single_poster_nonblocking, iters) {
-  run_pingpong_test<EmulatedFutexAtomic, true, false>(iters);
-}
-
-BENCHMARK(baton_pingpong_emulated_futex_multi_poster_nonblocking, iters) {
-  run_pingpong_test<EmulatedFutexAtomic, false, false>(iters);
-}
-
-BENCHMARK_DRAW_LINE()
-
-BENCHMARK(posix_sem_pingpong, iters) {
-  sem_t sems[3];
-  sem_t* a = sems + 0;
-  sem_t* b = sems + 2; // to get it on a different cache line
-
-  sem_init(a, 0, 0);
-  sem_init(b, 0, 0);
-  auto thr = std::thread([=] {
-    for (size_t i = 0; i < iters; ++i) {
-      sem_wait(a);
-      sem_post(b);
-    }
-  });
-  for (size_t i = 0; i < iters; ++i) {
-    sem_post(a);
-    sem_wait(b);
-  }
-  thr.join();
-}
-
-// I am omitting a benchmark result snapshot because these microbenchmarks
-// mainly illustrate that PreBlockAttempts is very effective for rapid
-// handoffs.  The performance of Baton and sem_t is essentially identical
-// to the required futex calls for the blocking case
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  gflags::ParseCommandLineFlags(&argc, &argv, true);
-
-  auto rv = RUN_ALL_TESTS();
-  if (!rv && FLAGS_benchmark) {
-    folly::runBenchmarks();
-  }
-  return rv;
-}
diff --git a/folly/test/BatonTest.cpp b/folly/test/BatonTest.cpp
deleted file mode 100644
index ded7fb50..00000000
--- a/folly/test/BatonTest.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright 2017 Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <folly/Baton.h>
-
-#include <thread>
-
-#include <folly/portability/GTest.h>
-#include <folly/test/BatonTestHelpers.h>
-#include <folly/test/DeterministicSchedule.h>
-
-using namespace folly;
-using namespace folly::test;
-using folly::detail::EmulatedFutexAtomic;
-
-/// Basic test
-
-TEST(Baton, basic_single_poster_blocking) {
-  run_basic_test<std::atomic, true, true>();
-  run_basic_test<EmulatedFutexAtomic, true, true>();
-  run_basic_test<DeterministicAtomic, true, true>();
-}
-
-TEST(Baton, basic_single_poster_nonblocking) {
-  run_basic_test<std::atomic, true, false>();
-  run_basic_test<EmulatedFutexAtomic, true, false>();
-  run_basic_test<DeterministicAtomic, true, false>();
-}
-
-TEST(Baton, basic_multi_poster_blocking) {
-  run_basic_test<std::atomic, false, true>();
-}
-
-TEST(Baton, basic_multi_poster_nonblocking) {
-  run_basic_test<std::atomic, false, false>();
-}
-
-/// Ping pong tests
-
-TEST(Baton, pingpong_single_poster_blocking) {
-  DSched sched(DSched::uniform(0));
-
-  run_pingpong_test<DeterministicAtomic, true, true>(1000);
-}
-
-TEST(Baton, pingpong_single_poster_nonblocking) {
-  DSched sched(DSched::uniform(0));
-
-  run_pingpong_test<DeterministicAtomic, true, false>(1000);
-}
-
-TEST(Baton, pingpong_multi_poster_blocking) {
-  DSched sched(DSched::uniform(0));
-
-  run_pingpong_test<DeterministicAtomic, false, true>(1000);
-}
-
-TEST(Baton, pingpong_multi_poster_nonblocking) {
-  DSched sched(DSched::uniform(0));
-
-  run_pingpong_test<DeterministicAtomic, false, false>(1000);
-}
-
-/// Timed wait tests - Nonblocking Baton does not support timed_wait()
-
-// Timed wait basic system clock tests
-
-TEST(Baton, timed_wait_basic_system_clock_single_poster) {
-  run_basic_timed_wait_tests<std::atomic, std::chrono::system_clock, true>();
-  run_basic_timed_wait_tests<
-      EmulatedFutexAtomic,
-      std::chrono::system_clock,
-      true>();
-  run_basic_timed_wait_tests<
-      DeterministicAtomic,
-      std::chrono::system_clock,
-      true>();
-}
-
-TEST(Baton, timed_wait_basic_system_clock_multi_poster) {
-  run_basic_timed_wait_tests<std::atomic, std::chrono::system_clock, false>();
-  run_basic_timed_wait_tests<
-      EmulatedFutexAtomic,
-      std::chrono::system_clock,
-      false>();
-  run_basic_timed_wait_tests<
-      DeterministicAtomic,
-      std::chrono::system_clock,
-      false>();
-}
-
-// Timed wait timeout system clock tests
-
-TEST(Baton, timed_wait_timeout_system_clock_single_poster) {
-  run_timed_wait_tmo_tests<std::atomic, std::chrono::system_clock, true>();
-  run_timed_wait_tmo_tests<
-      EmulatedFutexAtomic,
-      std::chrono::system_clock,
-      true>();
-  run_timed_wait_tmo_tests<
-      DeterministicAtomic,
-      std::chrono::system_clock,
-      true>();
-}
-
-TEST(Baton, timed_wait_timeout_system_clock_multi_poster) {
-  run_timed_wait_tmo_tests<std::atomic, std::chrono::system_clock, false>();
-  run_timed_wait_tmo_tests<
-      EmulatedFutexAtomic,
-      std::chrono::system_clock,
-      false>();
-  run_timed_wait_tmo_tests<
-      DeterministicAtomic,
-      std::chrono::system_clock,
-      false>();
-}
-
-// Timed wait regular system clock tests
-
-TEST(Baton, timed_wait_system_clock_single_poster) {
-  run_timed_wait_regular_test<std::atomic, std::chrono::system_clock, true>();
-  run_timed_wait_regular_test<
-      EmulatedFutexAtomic,
-      std::chrono::system_clock,
-      true>();
-  run_timed_wait_regular_test<
-      DeterministicAtomic,
-      std::chrono::system_clock,
-      true>();
-}
-
-TEST(Baton, timed_wait_system_clock_multi_poster) {
-  run_timed_wait_regular_test<std::atomic, std::chrono::system_clock, false>();
-  run_timed_wait_regular_test<
-      EmulatedFutexAtomic,
-      std::chrono::system_clock,
-      false>();
-  run_timed_wait_regular_test<
-      DeterministicAtomic,
-      std::chrono::system_clock,
-      false>();
-}
-
-// Timed wait basic steady clock tests
-
-TEST(Baton, timed_wait_basic_steady_clock_single_poster) {
-  run_basic_timed_wait_tests<std::atomic, std::chrono::steady_clock, true>();
-  run_basic_timed_wait_tests<
-      EmulatedFutexAtomic,
-      std::chrono::steady_clock,
-      true>();
-  run_basic_timed_wait_tests<
-      DeterministicAtomic,
-      std::chrono::steady_clock,
-      true>();
-}
-
-TEST(Baton, timed_wait_basic_steady_clock_multi_poster) {
-  run_basic_timed_wait_tests<std::atomic, std::chrono::steady_clock, false>();
-  run_basic_timed_wait_tests<
-      EmulatedFutexAtomic,
-      std::chrono::steady_clock,
-      false>();
-  run_basic_timed_wait_tests<
-      DeterministicAtomic,
-      std::chrono::steady_clock,
-      false>();
-}
-
-// Timed wait timeout steady clock tests
-
-TEST(Baton, timed_wait_timeout_steady_clock_single_poster) {
-  run_timed_wait_tmo_tests<std::atomic, std::chrono::steady_clock, true>();
-  run_timed_wait_tmo_tests<
-      EmulatedFutexAtomic,
-      std::chrono::steady_clock,
-      true>();
-  run_timed_wait_tmo_tests<
-      DeterministicAtomic,
-      std::chrono::steady_clock,
-      true>();
-}
-
-TEST(Baton, timed_wait_timeout_steady_clock_multi_poster) {
-  run_timed_wait_tmo_tests<std::atomic, std::chrono::steady_clock, false>();
-  run_timed_wait_tmo_tests<
-      EmulatedFutexAtomic,
-      std::chrono::steady_clock,
-      false>();
-  run_timed_wait_tmo_tests<
-      DeterministicAtomic,
-      std::chrono::steady_clock,
-      false>();
-}
-
-// Timed wait regular steady clock tests
-
-TEST(Baton, timed_wait_steady_clock_single_poster) {
-  run_timed_wait_regular_test<std::atomic, std::chrono::steady_clock, true>();
-  run_timed_wait_regular_test<
-      EmulatedFutexAtomic,
-      std::chrono::steady_clock,
-      true>();
-  run_timed_wait_regular_test<
-      DeterministicAtomic,
-      std::chrono::steady_clock,
-      true>();
-}
-
-TEST(Baton, timed_wait_steady_clock_multi_poster) {
-  run_timed_wait_regular_test<std::atomic, std::chrono::steady_clock, false>();
-  run_timed_wait_regular_test<
-      EmulatedFutexAtomic,
-      std::chrono::steady_clock,
-      false>();
-  run_timed_wait_regular_test<
-      DeterministicAtomic,
-      std::chrono::steady_clock,
-      false>();
-}
-
-/// Try wait tests
-
-TEST(Baton, try_wait_single_poster_blocking) {
-  run_try_wait_tests<std::atomic, true, true>();
-  run_try_wait_tests<EmulatedFutexAtomic, true, true>();
-  run_try_wait_tests<DeterministicAtomic, true, true>();
-}
-
-TEST(Baton, try_wait_single_poster_nonblocking) {
-  run_try_wait_tests<std::atomic, true, false>();
-  run_try_wait_tests<EmulatedFutexAtomic, true, false>();
-  run_try_wait_tests<DeterministicAtomic, true, false>();
-}
-
-TEST(Baton, try_wait_multi_poster_blocking) {
-  run_try_wait_tests<std::atomic, false, true>();
-  run_try_wait_tests<EmulatedFutexAtomic, false, true>();
-  run_try_wait_tests<DeterministicAtomic, false, true>();
-}
-
-TEST(Baton, try_wait_multi_poster_nonblocking) {
-  run_try_wait_tests<std::atomic, false, false>();
-  run_try_wait_tests<EmulatedFutexAtomic, false, false>();
-  run_try_wait_tests<DeterministicAtomic, false, false>();
-}
-
-/// Multi-producer tests
-
-TEST(Baton, multi_producer_single_poster_blocking) {
-  run_try_wait_tests<std::atomic, true, true>();
-  run_try_wait_tests<EmulatedFutexAtomic, true, true>();
-  run_try_wait_tests<DeterministicAtomic, true, true>();
-}
-
-TEST(Baton, multi_producer_single_poster_nonblocking) {
-  run_try_wait_tests<std::atomic, true, false>();
-  run_try_wait_tests<EmulatedFutexAtomic, true, false>();
-  run_try_wait_tests<DeterministicAtomic, true, false>();
-}
-
-TEST(Baton, multi_producer_multi_poster_blocking) {
-  run_try_wait_tests<std::atomic, false, true>();
-  run_try_wait_tests<EmulatedFutexAtomic, false, true>();
-  run_try_wait_tests<DeterministicAtomic, false, true>();
-}
-
-TEST(Baton, multi_producer_multi_poster_nonblocking) {
-  run_try_wait_tests<std::atomic, false, false>();
-  run_try_wait_tests<EmulatedFutexAtomic, false, false>();
-  run_try_wait_tests<DeterministicAtomic, false, false>();
-}
diff --git a/folly/test/BatonTestHelpers.h b/folly/test/BatonTestHelpers.h
deleted file mode 100644
index 3a48a215..00000000
--- a/folly/test/BatonTestHelpers.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright 2017 Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <folly/Baton.h>
-#include <folly/portability/GTest.h>
-#include <folly/test/DeterministicSchedule.h>
-
-namespace folly {
-namespace test {
-
-typedef DeterministicSchedule DSched;
-
-template <template <typename> class Atom, bool SinglePoster, bool Blocking>
-void run_basic_test() {
-  Baton<Atom, SinglePoster, Blocking> b;
-  b.post();
-  b.wait();
-}
-
-template <template <typename> class Atom, bool SinglePoster, bool Blocking>
-void run_pingpong_test(int numRounds) {
-  using B = Baton<Atom, SinglePoster, Blocking>;
-  B batons[17];
-  B& a = batons[0];
-  B& b = batons[16]; // to get it on a different cache line
-  auto thr = DSched::thread([&] {
-    for (int i = 0; i < numRounds; ++i) {
-      a.wait();
-      a.reset();
-      b.post();
-    }
-  });
-  for (int i = 0; i < numRounds; ++i) {
-    a.post();
-    b.wait();
-    b.reset();
-  }
-  DSched::join(thr);
-}
-
-template <template <typename> class Atom, typename Clock, bool SinglePoster>
-void run_basic_timed_wait_tests() {
-  Baton<Atom, SinglePoster> b;
-  b.post();
-  // tests if early delivery works fine
-  EXPECT_TRUE(b.timed_wait(Clock::now()));
-}
-
-template <template <typename> class Atom, typename Clock, bool SinglePoster>
-void run_timed_wait_tmo_tests() {
-  Baton<Atom, SinglePoster> b;
-
-  auto thr = DSched::thread([&] {
-    bool rv = b.timed_wait(Clock::now() + std::chrono::milliseconds(1));
-    // main thread is guaranteed to not post until timeout occurs
-    EXPECT_FALSE(rv);
-  });
-  DSched::join(thr);
-}
-
-template <template <typename> class Atom, typename Clock, bool SinglePoster>
-void run_timed_wait_regular_test() {
-  Baton<Atom, SinglePoster> b;
-
-  auto thr = DSched::thread([&] {
-    // To wait forever we'd like to use time_point<Clock>::max, but
-    // std::condition_variable does math to convert the timeout to
-    // system_clock without handling overflow.
-    auto farFuture = Clock::now() + std::chrono::hours(1000);
-    bool rv = b.timed_wait(farFuture);
-    if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
-      // DeterministicAtomic ignores actual times, so doesn't guarantee
-      // a lack of timeout
-      EXPECT_TRUE(rv);
-    }
-  });
-
-  if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
-    // If we are using std::atomic (or EmulatedFutexAtomic) then
-    // a sleep here guarantees to a large extent that 'thr' will
-    // execute wait before we post it, thus testing late delivery. For
-    // DeterministicAtomic, we just rely on DeterministicSchedule to do
-    // the scheduling.  The test won't fail if we lose the race, we just
-    // don't get coverage.
-    std::this_thread::sleep_for(std::chrono::milliseconds(2));
-  }
-
-  b.post();
-  DSched::join(thr);
-}
-
-template <template <typename> class Atom, bool SinglePoster, bool Blocking>
-void run_try_wait_tests() {
-  Baton<Atom, SinglePoster, Blocking> b;
-  EXPECT_FALSE(b.try_wait());
-  b.post();
-  EXPECT_TRUE(b.try_wait());
-}
-
-template <template <typename> class Atom, bool SinglePoster, bool Blocking>
-void run_multi_producer_tests() {
-  constexpr int NPROD = 5;
-  Baton<Atom, SinglePoster, Blocking> local_ping[NPROD];
-  Baton<Atom, SinglePoster, Blocking> local_pong[NPROD];
-  Baton<Atom, /* SingleProducer = */ false, Blocking> global;
-  Baton<Atom, SinglePoster, Blocking> shutdown;
-
-  std::thread prod[NPROD];
-  for (int i = 0; i < NPROD; ++i) {
-    prod[i] = DSched::thread([&, i] {
-      if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
-        // If we are using std::atomic (or EmulatedFutexAtomic) then
-        // a variable sleep here will make it more likely that
-        // global.post()-s will span more than one global.wait() by
-        // the consumer thread and for the latter to block (if the
-        // global baton is blocking). For DeterministicAtomic, we just
-        // rely on DeterministicSchedule to do the scheduling.  The
-        // test won't fail if we lose the race, we just don't get
-        // coverage.
-        for (int j = 0; j < i; ++j) {
-          std::this_thread::sleep_for(std::chrono::microseconds(1));
-        }
-      }
-      local_ping[i].post();
-      global.post();
-      local_pong[i].wait();
-    });
-  }
-
-  auto cons = DSched::thread([&] {
-    while (true) {
-      global.wait();
-      global.reset();
-      if (shutdown.try_wait()) {
-        return;
-      }
-      for (int i = 0; i < NPROD; ++i) {
-        if (local_ping.try_wait()) {
-          local_ping.reset();
-          local_pong.post();
-        }
-      }
-    }
-  });
-
-  for (auto& t : prod) {
-    DSched::join(t);
-  }
-
-  global.post();
-  shutdown.post();
-  DSched::join(cons);
-}
-
-} // namespace test
-} // namespace folly
diff --git a/folly/test/MemoryIdlerTest.cpp b/folly/test/MemoryIdlerTest.cpp
index 72546f41..d5393115 100644
--- a/folly/test/MemoryIdlerTest.cpp
+++ b/folly/test/MemoryIdlerTest.cpp
@@ -16,9 +16,9 @@
 
 #include <folly/detail/MemoryIdler.h>
 
-#include <folly/Baton.h>
 #include <folly/portability/GMock.h>
 #include <folly/portability/GTest.h>
+#include <folly/synchronization/Baton.h>
 
 #include <memory>
 #include <thread>
diff --git a/folly/test/ThreadLocalTest.cpp b/folly/test/ThreadLocalTest.cpp
index 6791c0f4..e724b999 100644
--- a/folly/test/ThreadLocalTest.cpp
+++ b/folly/test/ThreadLocalTest.cpp
@@ -37,11 +37,11 @@
 
 #include <glog/logging.h>
 
-#include <folly/Baton.h>
 #include <folly/Memory.h>
 #include <folly/experimental/io/FsUtil.h>
 #include <folly/portability/GTest.h>
 #include <folly/portability/Unistd.h>
+#include <folly/synchronization/Baton.h>
 #include <folly/system/ThreadId.h>
 
 using namespace folly;