From 05ce52289b0ec8f525a92d6d1955301d0b77c0a7 Mon Sep 17 00:00:00 2001
From: Giuseppe Ottaviano <ott@fb.com>
Date: Wed, 28 Jun 2017 11:09:42 -0700
Subject: [PATCH] Move CacheLocality out of detail/ and into concurrency/

Summary: There's no reason these utilities should only be used by folly.

Reviewed By: mzlee

Differential Revision: D5317894

fbshipit-source-id: 5a9bdf4c5efaa5bcbe78e6723a03a468f2fe5e32
---
 CMakeLists.txt                                |  3 ++-
 folly/IndexedMemPool.h                        |  8 +++++---
 folly/LifoSem.h                               |  6 ++----
 folly/MPMCQueue.h                             | 18 ++++++++---------
 folly/Makefile.am                             |  4 ++--
 folly/ProducerConsumerQueue.h                 | 14 ++++++-------
 folly/SharedMutex.h                           |  7 ++++---
 folly/TokenBucket.h                           |  2 +-
 .../{detail => concurrency}/CacheLocality.cpp |  8 +++-----
 folly/{detail => concurrency}/CacheLocality.h |  7 +++----
 folly/concurrency/CoreCachedSharedPtr.h       |  8 ++++----
 .../test/CacheLocalityBenchmark.cpp           | 15 +++++++-------
 .../test/CacheLocalityTest.cpp                | 20 ++++++++-----------
 folly/detail/CachelinePaddedImpl.h            |  6 +++---
 folly/detail/MemoryIdler.cpp                  |  2 +-
 .../flat_combining/FlatCombining.h            |  2 +-
 folly/test/CachelinePaddedTest.cpp            |  2 +-
 folly/test/DeterministicSchedule.cpp          |  4 ++--
 folly/test/DeterministicSchedule.h            |  5 +++--
 19 files changed, 67 insertions(+), 74 deletions(-)
 rename folly/{detail => concurrency}/CacheLocality.cpp (97%)
 rename folly/{detail => concurrency}/CacheLocality.h (99%)
 rename folly/{ => concurrency}/test/CacheLocalityBenchmark.cpp (96%)
 rename folly/{ => concurrency}/test/CacheLocalityTest.cpp (97%)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4bfa9974..7d476c5c 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -292,6 +292,8 @@ if (BUILD_TESTS)
   apply_folly_compile_options_to_target(folly_test_support)
 
   folly_define_tests(
+    DIRECTORY concurrency/
+      TEST cache_locality_test SOURCES CacheLocalityTest.cpp
     DIRECTORY experimental/test/
       TEST autotimer_test SOURCES AutoTimerTest.cpp
       TEST bits_test_2 SOURCES BitsTest.cpp
@@ -467,7 +469,6 @@ if (BUILD_TESTS)
       TEST baton_test SOURCES BatonTest.cpp
       TEST bit_iterator_test SOURCES BitIteratorTest.cpp
       TEST bits_test SOURCES BitsTest.cpp
-      TEST cache_locality_test SOURCES CacheLocalityTest.cpp
       TEST cacheline_padded_test SOURCES CachelinePaddedTest.cpp
       TEST call_once_test SOURCES CallOnceTest.cpp
       TEST checksum_test SOURCES ChecksumTest.cpp
diff --git a/folly/IndexedMemPool.h b/folly/IndexedMemPool.h
index 275d441c..6b3fa53a 100644
--- a/folly/IndexedMemPool.h
+++ b/folly/IndexedMemPool.h
@@ -16,14 +16,16 @@
 
 #pragma once
 
-#include <type_traits>
 #include <assert.h>
 #include <errno.h>
 #include <stdint.h>
+
+#include <type_traits>
+
 #include <boost/noncopyable.hpp>
 #include <folly/AtomicStruct.h>
 #include <folly/Portability.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 #include <folly/portability/SysMman.h>
 #include <folly/portability/Unistd.h>
 
@@ -497,7 +499,7 @@ struct IndexedMemPool : boost::noncopyable {
   }
 
   AtomicStruct<TaggedPtr,Atom>& localHead() {
-    auto stripe = detail::AccessSpreader<Atom>::current(NumLocalLists);
+    auto stripe = AccessSpreader<Atom>::current(NumLocalLists);
     return local_[stripe].head;
   }
 
diff --git a/folly/LifoSem.h b/folly/LifoSem.h
index f6b7bf02..9c0404fe 100644
--- a/folly/LifoSem.h
+++ b/folly/LifoSem.h
@@ -27,7 +27,7 @@
 #include <folly/Baton.h>
 #include <folly/IndexedMemPool.h>
 #include <folly/Likely.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 namespace folly {
 
@@ -515,9 +515,7 @@ struct LifoSemBase {
   FOLLY_ALIGN_TO_AVOID_FALSE_SHARING
   folly::AtomicStruct<LifoSemHead,Atom> head_;
 
-  char padding_[folly::detail::CacheLocality::kFalseSharingRange -
-      sizeof(LifoSemHead)];
-
+  char padding_[folly::CacheLocality::kFalseSharingRange - sizeof(LifoSemHead)];
 
   static LifoSemNode<Handoff, Atom>& idxToNode(uint32_t idx) {
     auto raw = &LifoSemRawNode<Atom>::pool()[idx];
diff --git a/folly/MPMCQueue.h b/folly/MPMCQueue.h
index b0cfc46f..0e921060 100644
--- a/folly/MPMCQueue.h
+++ b/folly/MPMCQueue.h
@@ -25,7 +25,7 @@
 #include <type_traits>
 
 #include <folly/Traits.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 #include <folly/detail/TurnSequencer.h>
 #include <folly/portability/Unistd.h>
 
@@ -647,11 +647,11 @@ class MPMCQueueBase<Derived<T, Atom, Dynamic>> : boost::noncopyable {
     }
 
     // ideally this would be a static assert, but g++ doesn't allow it
-    assert(alignof(MPMCQueue<T,Atom>)
-           >= detail::CacheLocality::kFalseSharingRange);
-    assert(static_cast<uint8_t*>(static_cast<void*>(&popTicket_))
-           - static_cast<uint8_t*>(static_cast<void*>(&pushTicket_))
-           >= detail::CacheLocality::kFalseSharingRange);
+    assert(alignof(MPMCQueue<T, Atom>) >= CacheLocality::kFalseSharingRange);
+    assert(
+        static_cast<uint8_t*>(static_cast<void*>(&popTicket_)) -
+            static_cast<uint8_t*>(static_cast<void*>(&pushTicket_)) >=
+        CacheLocality::kFalseSharingRange);
   }
 
   /// A default-constructed queue is useful because a usable (non-zero
@@ -971,8 +971,7 @@ class MPMCQueueBase<Derived<T, Atom, Dynamic>> : boost::noncopyable {
     /// To avoid false sharing in slots_ with neighboring memory
     /// allocations, we pad it with this many SingleElementQueue-s at
     /// each end
-    kSlotPadding = (detail::CacheLocality::kFalseSharingRange - 1)
-        / sizeof(Slot) + 1
+    kSlotPadding = (CacheLocality::kFalseSharingRange - 1) / sizeof(Slot) + 1
   };
 
   /// The maximum number of items in the queue at once
@@ -1024,8 +1023,7 @@ class MPMCQueueBase<Derived<T, Atom, Dynamic>> : boost::noncopyable {
 
   /// Alignment doesn't prevent false sharing at the end of the struct,
   /// so fill out the last cache line
-  char padding_[detail::CacheLocality::kFalseSharingRange -
-                sizeof(Atom<uint32_t>)];
+  char padding_[CacheLocality::kFalseSharingRange - sizeof(Atom<uint32_t>)];
 
   /// We assign tickets in increasing order, but we don't want to
   /// access neighboring elements of slots_ because that will lead to
diff --git a/folly/Makefile.am b/folly/Makefile.am
index 74977332..acc1c0bd 100644
--- a/folly/Makefile.am
+++ b/folly/Makefile.am
@@ -56,12 +56,12 @@ nobase_follyinclude_HEADERS = \
 	CppAttributes.h \
 	CpuId.h \
 	CPortability.h \
+	concurrency/CacheLocality.h \
 	concurrency/CoreCachedSharedPtr.h \
 	detail/AtomicHashUtils.h \
 	detail/AtomicUnorderedMapUtils.h \
 	detail/AtomicUtils.h \
 	detail/BitIteratorDetail.h \
-	detail/CacheLocality.h \
 	detail/CachelinePaddedImpl.h \
 	detail/ChecksumDetail.h \
 	detail/DiscriminatedPtrDetail.h \
@@ -459,7 +459,7 @@ libfolly_la_SOURCES = \
 	Assume.cpp \
 	Checksum.cpp \
 	ClockGettimeWrappers.cpp \
-	detail/CacheLocality.cpp \
+	concurrency/CacheLocality.cpp \
 	detail/IPAddress.cpp \
 	dynamic.cpp \
 	ExceptionWrapper.cpp \
diff --git a/folly/ProducerConsumerQueue.h b/folly/ProducerConsumerQueue.h
index d0bf3ec8..12f2bf42 100644
--- a/folly/ProducerConsumerQueue.h
+++ b/folly/ProducerConsumerQueue.h
@@ -27,7 +27,7 @@
 #include <type_traits>
 #include <utility>
 
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 namespace folly {
 
@@ -168,14 +168,14 @@ struct ProducerConsumerQueue {
   }
 
 private:
-  char pad0_[detail::CacheLocality::kFalseSharingRange];
-  const uint32_t size_;
-  T* const records_;
+ char pad0_[CacheLocality::kFalseSharingRange];
+ const uint32_t size_;
+ T* const records_;
 
-  FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned int> readIndex_;
-  FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned int> writeIndex_;
+ FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned int> readIndex_;
+ FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned int> writeIndex_;
 
-  char pad1_[detail::CacheLocality::kFalseSharingRange - sizeof(writeIndex_)];
+ char pad1_[CacheLocality::kFalseSharingRange - sizeof(writeIndex_)];
 };
 
 }
diff --git a/folly/SharedMutex.h b/folly/SharedMutex.h
index c13a6d6f..24d8051c 100644
--- a/folly/SharedMutex.h
+++ b/folly/SharedMutex.h
@@ -19,11 +19,13 @@
 #pragma once
 
 #include <stdint.h>
+
 #include <atomic>
 #include <thread>
 #include <type_traits>
+
 #include <folly/Likely.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 #include <folly/detail/Futex.h>
 #include <folly/portability/Asm.h>
 #include <folly/portability/SysResource.h>
@@ -1417,8 +1419,7 @@ bool SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
         // starting point for our empty-slot search, can change after
         // calling waitForZeroBits
         uint32_t bestSlot =
-            (uint32_t)folly::detail::AccessSpreader<Atom>::current(
-                kMaxDeferredReaders);
+            (uint32_t)folly::AccessSpreader<Atom>::current(kMaxDeferredReaders);
 
         // deferred readers are already enabled, or it is time to
         // enable them if we can find a slot
diff --git a/folly/TokenBucket.h b/folly/TokenBucket.h
index d88bcd86..905b0f9f 100644
--- a/folly/TokenBucket.h
+++ b/folly/TokenBucket.h
@@ -21,7 +21,7 @@
 #include <chrono>
 
 #include <folly/Likely.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 namespace folly {
 
diff --git a/folly/detail/CacheLocality.cpp b/folly/concurrency/CacheLocality.cpp
similarity index 97%
rename from folly/detail/CacheLocality.cpp
rename to folly/concurrency/CacheLocality.cpp
index d646ebe9..36b77b83 100644
--- a/folly/detail/CacheLocality.cpp
+++ b/folly/concurrency/CacheLocality.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 #ifndef _MSC_VER
 #define _GNU_SOURCE 1 // for RTLD_NOLOAD
@@ -29,7 +29,6 @@
 #include <folly/ScopeGuard.h>
 
 namespace folly {
-namespace detail {
 
 ///////////// CacheLocality
 
@@ -244,13 +243,13 @@ SimpleAllocator::SimpleAllocator(size_t allocSize, size_t sz)
 SimpleAllocator::~SimpleAllocator() {
   std::lock_guard<std::mutex> g(m_);
   for (auto& block : blocks_) {
-    aligned_free(block);
+    detail::aligned_free(block);
   }
 }
 
 void* SimpleAllocator::allocateHard() {
   // Allocate a new slab.
-  mem_ = static_cast<uint8_t*>(aligned_malloc(allocSize_, allocSize_));
+  mem_ = static_cast<uint8_t*>(detail::aligned_malloc(allocSize_, allocSize_));
   if (!mem_) {
     std::__throw_bad_alloc();
   }
@@ -271,5 +270,4 @@ void* SimpleAllocator::allocateHard() {
   return mem;
 }
 
-} // namespace detail
 } // namespace folly
diff --git a/folly/detail/CacheLocality.h b/folly/concurrency/CacheLocality.h
similarity index 99%
rename from folly/detail/CacheLocality.h
rename to folly/concurrency/CacheLocality.h
index 741d30f8..be9d4410 100644
--- a/folly/detail/CacheLocality.h
+++ b/folly/concurrency/CacheLocality.h
@@ -38,7 +38,6 @@
 #include <folly/portability/Memory.h>
 
 namespace folly {
-namespace detail {
 
 // This file contains several classes that might be useful if you are
 // trying to dynamically optimize cache locality: CacheLocality reads
@@ -458,7 +457,8 @@ class CoreAllocator {
         // Align to a cacheline
         size = size + (CacheLocality::kFalseSharingRange - 1);
         size &= ~size_t(CacheLocality::kFalseSharingRange - 1);
-        void* mem = aligned_malloc(size, CacheLocality::kFalseSharingRange);
+        void* mem =
+            detail::aligned_malloc(size, CacheLocality::kFalseSharingRange);
         if (!mem) {
           std::__throw_bad_alloc();
         }
@@ -478,7 +478,7 @@ class CoreAllocator {
         auto allocator = *static_cast<SimpleAllocator**>(addr);
         allocator->deallocate(mem);
       } else {
-        aligned_free(mem);
+        detail::aligned_free(mem);
       }
     }
   };
@@ -507,5 +507,4 @@ StlAllocator<typename CoreAllocator<Stripes>::Allocator, T> getCoreAllocatorStl(
   return StlAllocator<typename CoreAllocator<Stripes>::Allocator, T>(alloc);
 }
 
-} // namespace detail
 } // namespace folly
diff --git a/folly/concurrency/CoreCachedSharedPtr.h b/folly/concurrency/CoreCachedSharedPtr.h
index 594050b2..ac89ac18 100644
--- a/folly/concurrency/CoreCachedSharedPtr.h
+++ b/folly/concurrency/CoreCachedSharedPtr.h
@@ -20,7 +20,7 @@
 #include <memory>
 
 #include <folly/Enumerate.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 namespace folly {
 
@@ -46,14 +46,14 @@ class CoreCachedSharedPtr {
     // prevent false sharing. Their control blocks will be adjacent
     // thanks to allocate_shared().
     for (auto slot : folly::enumerate(slots_)) {
-      auto alloc = detail::getCoreAllocatorStl<Holder, kNumSlots>(slot.index);
+      auto alloc = getCoreAllocatorStl<Holder, kNumSlots>(slot.index);
       auto holder = std::allocate_shared<Holder>(alloc, p);
       *slot = std::shared_ptr<T>(holder, p.get());
     }
   }
 
   std::shared_ptr<T> get() const {
-    return slots_[detail::AccessSpreader<>::current(kNumSlots)];
+    return slots_[AccessSpreader<>::current(kNumSlots)];
   }
 
  private:
@@ -75,7 +75,7 @@ class CoreCachedWeakPtr {
   }
 
   std::weak_ptr<T> get() const {
-    return slots_[detail::AccessSpreader<>::current(kNumSlots)];
+    return slots_[AccessSpreader<>::current(kNumSlots)];
   }
 
  private:
diff --git a/folly/test/CacheLocalityBenchmark.cpp b/folly/concurrency/test/CacheLocalityBenchmark.cpp
similarity index 96%
rename from folly/test/CacheLocalityBenchmark.cpp
rename to folly/concurrency/test/CacheLocalityBenchmark.cpp
index dbb2d6ac..4c6c9ea1 100644
--- a/folly/test/CacheLocalityBenchmark.cpp
+++ b/folly/concurrency/test/CacheLocalityBenchmark.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 #include <memory>
 #include <thread>
@@ -24,7 +24,7 @@
 
 #include <folly/Benchmark.h>
 
-using namespace folly::detail;
+using namespace folly;
 
 #define DECLARE_SPREADER_TAG(tag, locality, func)      \
   namespace {                                          \
@@ -32,7 +32,6 @@ using namespace folly::detail;
   struct tag {};                                       \
   }                                                    \
   namespace folly {                                    \
-  namespace detail {                                   \
   template <>                                          \
   const CacheLocality& CacheLocality::system<tag>() {  \
     static auto* inst = new CacheLocality(locality);   \
@@ -42,16 +41,16 @@ using namespace folly::detail;
   Getcpu::Func AccessSpreader<tag>::pickGetcpuFunc() { \
     return func;                                       \
   }                                                    \
-  }                                                    \
   }
 
 DECLARE_SPREADER_TAG(
     ThreadLocalTag,
     CacheLocality::system<>(),
-    folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu)
-DECLARE_SPREADER_TAG(PthreadSelfTag,
-                     CacheLocality::system<>(),
-                     folly::detail::FallbackGetcpu<HashingThreadId>::getcpu)
+    folly::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu)
+DECLARE_SPREADER_TAG(
+    PthreadSelfTag,
+    CacheLocality::system<>(),
+    folly::FallbackGetcpu<HashingThreadId>::getcpu)
 
 BENCHMARK(AccessSpreaderUse, iters) {
   for (unsigned long i = 0; i < iters; ++i) {
diff --git a/folly/test/CacheLocalityTest.cpp b/folly/concurrency/test/CacheLocalityTest.cpp
similarity index 97%
rename from folly/test/CacheLocalityTest.cpp
rename to folly/concurrency/test/CacheLocalityTest.cpp
index cb18f14c..92a5abc6 100644
--- a/folly/test/CacheLocalityTest.cpp
+++ b/folly/concurrency/test/CacheLocalityTest.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 #include <folly/portability/GTest.h>
 
@@ -24,7 +24,7 @@
 #include <unordered_map>
 #include <glog/logging.h>
 
-using namespace folly::detail;
+using namespace folly;
 
 /// This is the relevant nodes from a production box's sysfs tree.  If you
 /// think this map is ugly you should see the version of this test that
@@ -363,13 +363,12 @@ TEST(Getcpu, VdsoGetcpu) {
 #ifdef FOLLY_TLS
 TEST(ThreadId, SimpleTls) {
   unsigned cpu = 0;
-  auto rv =
-      folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
-          &cpu, nullptr, nullptr);
+  auto rv = folly::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
+      &cpu, nullptr, nullptr);
   EXPECT_EQ(rv, 0);
   EXPECT_TRUE(cpu > 0);
   unsigned again;
-  folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
+  folly::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
       &again, nullptr, nullptr);
   EXPECT_EQ(cpu, again);
 }
@@ -377,13 +376,12 @@ TEST(ThreadId, SimpleTls) {
 
 TEST(ThreadId, SimplePthread) {
   unsigned cpu = 0;
-  auto rv = folly::detail::FallbackGetcpu<HashingThreadId>::getcpu(
-      &cpu, nullptr, nullptr);
+  auto rv =
+      folly::FallbackGetcpu<HashingThreadId>::getcpu(&cpu, nullptr, nullptr);
   EXPECT_EQ(rv, 0);
   EXPECT_TRUE(cpu > 0);
   unsigned again;
-  folly::detail::FallbackGetcpu<HashingThreadId>::getcpu(
-      &again, nullptr, nullptr);
+  folly::FallbackGetcpu<HashingThreadId>::getcpu(&again, nullptr, nullptr);
   EXPECT_EQ(cpu, again);
 }
 
@@ -414,7 +412,6 @@ TEST(AccessSpreader, Simple) {
   struct tag {};                                       \
   }                                                    \
   namespace folly {                                    \
-  namespace detail {                                   \
   template <>                                          \
   const CacheLocality& CacheLocality::system<tag>() {  \
     static auto* inst = new CacheLocality(locality);   \
@@ -424,7 +421,6 @@ TEST(AccessSpreader, Simple) {
   Getcpu::Func AccessSpreader<tag>::pickGetcpuFunc() { \
     return func;                                       \
   }                                                    \
-  }                                                    \
   }
 
 DECLARE_SPREADER_TAG(ManualTag, CacheLocality::uniform(16), testingGetcpu)
diff --git a/folly/detail/CachelinePaddedImpl.h b/folly/detail/CachelinePaddedImpl.h
index e65a6ce6..1acce99d 100644
--- a/folly/detail/CachelinePaddedImpl.h
+++ b/folly/detail/CachelinePaddedImpl.h
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 namespace folly {
 
@@ -33,7 +33,7 @@ struct CachelinePaddedImpl;
 // We need alignas(T) alignas(kFalseSharingRange) for the case where alignof(T)
 // > alignof(kFalseSharingRange).
 template <typename T>
-struct alignas(T) alignas(detail::CacheLocality::kFalseSharingRange)
+struct alignas(T) alignas(CacheLocality::kFalseSharingRange)
     CachelinePaddedImpl<T, /* needsPadding = */ false> {
   template <typename... Args>
   explicit CachelinePaddedImpl(Args&&... args)
@@ -42,7 +42,7 @@ struct alignas(T) alignas(detail::CacheLocality::kFalseSharingRange)
 };
 
 template <typename T>
-struct alignas(T) alignas(detail::CacheLocality::kFalseSharingRange)
+struct alignas(T) alignas(CacheLocality::kFalseSharingRange)
     CachelinePaddedImpl<T, /* needsPadding = */ true> {
   template <typename... Args>
   explicit CachelinePaddedImpl(Args&&... args)
diff --git a/folly/detail/MemoryIdler.cpp b/folly/detail/MemoryIdler.cpp
index 608e244b..70c4c055 100644
--- a/folly/detail/MemoryIdler.cpp
+++ b/folly/detail/MemoryIdler.cpp
@@ -21,7 +21,7 @@
 #include <folly/Malloc.h>
 #include <folly/Portability.h>
 #include <folly/ScopeGuard.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 #include <folly/portability/PThread.h>
 #include <folly/portability/SysMman.h>
 #include <folly/portability/Unistd.h>
diff --git a/folly/experimental/flat_combining/FlatCombining.h b/folly/experimental/flat_combining/FlatCombining.h
index 146f1748..1ddc0c08 100644
--- a/folly/experimental/flat_combining/FlatCombining.h
+++ b/folly/experimental/flat_combining/FlatCombining.h
@@ -20,7 +20,7 @@
 #include <folly/Function.h>
 #include <folly/IndexedMemPool.h>
 #include <folly/Portability.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
 
 #include <atomic>
 #include <cassert>
diff --git a/folly/test/CachelinePaddedTest.cpp b/folly/test/CachelinePaddedTest.cpp
index 02fea430..2fe69e88 100644
--- a/folly/test/CachelinePaddedTest.cpp
+++ b/folly/test/CachelinePaddedTest.cpp
@@ -26,7 +26,7 @@ static_assert(
     std::is_standard_layout<CachelinePadded<int>>::value,
     "CachelinePadded<T> must be standard-layout if T is.");
 
-const int kCachelineSize = folly::detail::CacheLocality::kFalseSharingRange;
+const int kCachelineSize = folly::CacheLocality::kFalseSharingRange;
 
 template <int dataSize>
 struct SizedData {
diff --git a/folly/test/DeterministicSchedule.cpp b/folly/test/DeterministicSchedule.cpp
index 87ecb762..1272c42a 100644
--- a/folly/test/DeterministicSchedule.cpp
+++ b/folly/test/DeterministicSchedule.cpp
@@ -382,6 +382,7 @@ int Futex<DeterministicAtomic>::futexWake(int count, uint32_t wakeMask) {
   DeterministicSchedule::afterSharedAccess();
   return rv;
 }
+}
 
 template <>
 CacheLocality const& CacheLocality::system<test::DeterministicAtomic>() {
@@ -391,7 +392,6 @@ CacheLocality const& CacheLocality::system<test::DeterministicAtomic>() {
 
 template <>
 Getcpu::Func AccessSpreader<test::DeterministicAtomic>::pickGetcpuFunc() {
-  return &DeterministicSchedule::getcpu;
-}
+  return &detail::DeterministicSchedule::getcpu;
 }
 }
diff --git a/folly/test/DeterministicSchedule.h b/folly/test/DeterministicSchedule.h
index 5ec444e6..8f1f4283 100644
--- a/folly/test/DeterministicSchedule.h
+++ b/folly/test/DeterministicSchedule.h
@@ -28,8 +28,8 @@
 #include <vector>
 
 #include <folly/ScopeGuard.h>
+#include <folly/concurrency/CacheLocality.h>
 #include <folly/detail/AtomicUtils.h>
-#include <folly/detail/CacheLocality.h>
 #include <folly/detail/Futex.h>
 #include <folly/portability/Semaphore.h>
 
@@ -499,8 +499,9 @@ FutexResult Futex<test::DeterministicAtomic>::futexWaitImpl(
     std::chrono::time_point<std::chrono::system_clock>* absSystemTime,
     std::chrono::time_point<std::chrono::steady_clock>* absSteadyTime,
     uint32_t waitMask);
+}
 
 template <>
 Getcpu::Func AccessSpreader<test::DeterministicAtomic>::pickGetcpuFunc();
-}
+
 } // namespace folly::detail
-- 
2.34.1