From b1eb5b684447a4a58516d5e8ffbd328aab1e477d Mon Sep 17 00:00:00 2001 From: Giuseppe Ottaviano Date: Tue, 23 May 2017 22:52:12 -0700 Subject: [PATCH] A core-cached shared_ptr Summary: This class serves a similar use case to `ReadMostlySharedPtr`, but with two differences: - It returns actual `shared_ptr`s, so it can be used as a drop-in replacement. - Its memory utilization is constant, independent of the number of threads that use it. Also, the code is much simpler. On the other hand, it is about 4x slower (but it still scales linearly in the number of concurrent threads acquiring the reference). Reviewed By: djwatson Differential Revision: D5104730 fbshipit-source-id: 8c18b635e0390394b06417b6df8b790e4bd2d90d --- folly/Enumerate.h | 2 +- folly/Makefile.am | 1 + folly/concurrency/CoreCachedSharedPtr.h | 87 ++++++++++ .../test/CoreCachedSharedPtrTest.cpp | 149 ++++++++++++++++++ .../test/ReadMostlySharedPtrBenchmark.cpp | 4 +- 5 files changed, 241 insertions(+), 2 deletions(-) create mode 100644 folly/concurrency/CoreCachedSharedPtr.h create mode 100644 folly/concurrency/test/CoreCachedSharedPtrTest.cpp diff --git a/folly/Enumerate.h b/folly/Enumerate.h index 9efecfd2..94bcecef 100644 --- a/folly/Enumerate.h +++ b/folly/Enumerate.h @@ -21,7 +21,7 @@ #include -/* +/** * Similar to Python's enumerate(), folly::enumerate() can be used to * iterate a range with a for-range loop, and it also allows to * retrieve the count of iterations so far. diff --git a/folly/Makefile.am b/folly/Makefile.am index f03ceda9..3993b45f 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -58,6 +58,7 @@ nobase_follyinclude_HEADERS = \ CppAttributes.h \ CpuId.h \ CPortability.h \ + concurrency/CoreCachedSharedPtr.h \ detail/AtomicHashUtils.h \ detail/AtomicUnorderedMapUtils.h \ detail/AtomicUtils.h \ diff --git a/folly/concurrency/CoreCachedSharedPtr.h b/folly/concurrency/CoreCachedSharedPtr.h new file mode 100644 index 00000000..7f5b9a95 --- /dev/null +++ b/folly/concurrency/CoreCachedSharedPtr.h @@ -0,0 +1,87 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +namespace folly { + +/** + * This class creates core-local caches for a given shared_ptr, to + * mitigate contention when acquiring/releasing it. + * + * It has the same thread-safety guarantees as shared_ptr: it is safe + * to concurrently call get(), but reset()s must be synchronized with + * reads and other resets(). + * + * @author Giuseppe Ottaviano + */ +template +class CoreCachedSharedPtr { + public: + explicit CoreCachedSharedPtr(const std::shared_ptr& p = nullptr) { + reset(p); + } + + void reset(const std::shared_ptr& p = nullptr) { + for (auto& slot : slots_) { + auto holder = std::make_shared(p); + slot = std::shared_ptr(holder, p.get()); + } + } + + std::shared_ptr get() const { + return slots_[detail::AccessSpreader<>::current(kNumSlots)]; + } + + private: + template + friend class CoreCachedWeakPtr; + + // Space the Holders by a cache line, so their control blocks (which + // are adjacent to the slots thanks to make_shared()) will also be + // spaced. + struct FOLLY_ALIGN_TO_AVOID_FALSE_SHARING Holder { + explicit Holder(std::shared_ptr p) : ptr(std::move(p)) {} + std::shared_ptr ptr; + }; + + std::array, kNumSlots> slots_; +}; + +template +class CoreCachedWeakPtr { + public: + explicit CoreCachedWeakPtr(const CoreCachedSharedPtr& p) { + for (auto slot : folly::enumerate(slots_)) { + *slot = p.slots_[slot.index]; + } + } + + std::weak_ptr get() const { + return slots_[detail::AccessSpreader<>::current(kNumSlots)]; + } + + private: + std::array, kNumSlots> slots_; +}; + +} // namespace diff --git a/folly/concurrency/test/CoreCachedSharedPtrTest.cpp b/folly/concurrency/test/CoreCachedSharedPtrTest.cpp new file mode 100644 index 00000000..909a63b2 --- /dev/null +++ b/folly/concurrency/test/CoreCachedSharedPtrTest.cpp @@ -0,0 +1,149 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +TEST(CoreCachedSharedPtr, Basic) { + auto p = std::make_shared(1); + std::weak_ptr wp(p); + + folly::CoreCachedSharedPtr cached(p); + folly::CoreCachedWeakPtr wcached(cached); + + std::shared_ptr p2 = cached.get(); + std::weak_ptr wp2 = wcached.get(); + ASSERT_TRUE(p2 != nullptr); + ASSERT_EQ(*p2, 1); + ASSERT_FALSE(wp2.expired()); + + p.reset(); + cached.reset(); + // p2 should survive. + ASSERT_FALSE(wp.expired()); + // Here we don't know anything about wp2: could be expired even if + // there is a living reference to the main object. + + p2.reset(); + ASSERT_TRUE(wp.expired()); + ASSERT_TRUE(wp2.expired()); +} + +namespace { + +template +void parallelRun(Operation op, size_t numThreads, size_t iters) { + std::vector threads; + + // Prevent the compiler from hoisting code out of the loop. + auto opNoinline = [&]() FOLLY_NOINLINE { op(); }; + + for (size_t t = 0; t < numThreads; ++t) { + threads.emplace_back([&] { + for (size_t i = 0; i < iters; ++i) { + opNoinline(); + } + }); + } + + for (auto& t : threads) + t.join(); +} + +void benchmarkSharedPtrCopy(size_t numThreads, size_t iters) { + auto p = std::make_shared(1); + parallelRun([&] { return p; }, numThreads, iters); +} + +void benchmarkWeakPtrLock(size_t numThreads, size_t iters) { + auto p = std::make_shared(1); + std::weak_ptr wp = p; + parallelRun([&] { return wp.lock(); }, numThreads, iters); +} + +void benchmarkCoreCachedSharedPtrGet(size_t numThreads, size_t iters) { + folly::CoreCachedSharedPtr p(std::make_shared(1)); + parallelRun([&] { return p.get(); }, numThreads, iters); +} + +void benchmarkCoreCachedWeakPtrLock(size_t numThreads, size_t iters) { + folly::CoreCachedSharedPtr p(std::make_shared(1)); + folly::CoreCachedWeakPtr wp(p); + parallelRun([&] { return wp.get().lock(); }, numThreads, iters); +} + +} // namespace + +BENCHMARK(SharedPtrSingleThread, n) { + benchmarkSharedPtrCopy(1, n); +} +BENCHMARK(WeakPtrSingleThread, n) { + benchmarkWeakPtrLock(1, n); +} +BENCHMARK(CoreCachedSharedPtrSingleThread, n) { + benchmarkCoreCachedSharedPtrGet(1, n); +} +BENCHMARK(CoreCachedWeakPtrSingleThread, n) { + benchmarkCoreCachedWeakPtrLock(1, n); +} + +BENCHMARK_DRAW_LINE(); + +BENCHMARK(SharedPtr4Threads, n) { + benchmarkSharedPtrCopy(4, n); +} +BENCHMARK(WeakPtr4Threads, n) { + benchmarkWeakPtrLock(4, n); +} +BENCHMARK(CoreCachedSharedPtr4Threads, n) { + benchmarkCoreCachedSharedPtrGet(4, n); +} +BENCHMARK(CoreCachedWeakPtr4Threads, n) { + benchmarkCoreCachedWeakPtrLock(4, n); +} + +BENCHMARK_DRAW_LINE(); + +BENCHMARK(SharedPtr16Threads, n) { + benchmarkSharedPtrCopy(16, n); +} +BENCHMARK(WeakPtr16Threads, n) { + benchmarkWeakPtrLock(16, n); +} +BENCHMARK(CoreCachedSharedPtr16Threads, n) { + benchmarkCoreCachedSharedPtrGet(16, n); +} +BENCHMARK(CoreCachedWeakPtr16Threads, n) { + benchmarkCoreCachedWeakPtrLock(16, n); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + gflags::ParseCommandLineFlags(&argc, &argv, true); + + auto ret = RUN_ALL_TESTS(); + if (ret == 0 && FLAGS_benchmark) { + folly::runBenchmarks(); + } + + return ret; +} diff --git a/folly/experimental/test/ReadMostlySharedPtrBenchmark.cpp b/folly/experimental/test/ReadMostlySharedPtrBenchmark.cpp index b1139f24..a2e2dcc3 100644 --- a/folly/experimental/test/ReadMostlySharedPtrBenchmark.cpp +++ b/folly/experimental/test/ReadMostlySharedPtrBenchmark.cpp @@ -36,9 +36,11 @@ void benchmark(size_t n) { for (size_t t = 0; t < threadCount; ++t) { ts.emplace_back([&]() { WeakPtr weakPtr(mainPtr); + // Prevent the compiler from hoisting code out of the loop. + auto op = [&]() FOLLY_NOINLINE { weakPtr.lock(); }; for (size_t i = 0; i < n; ++i) { - weakPtr.lock(); + op(); } }); } -- 2.34.1