2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/ThreadLocal.h>
20 #include <sys/types.h>
27 #include <condition_variable>
33 #include <unordered_map>
35 #include <boost/thread/tss.hpp>
36 #include <gflags/gflags.h>
37 #include <glog/logging.h>
38 #include <gtest/gtest.h>
40 #include <folly/Benchmark.h>
41 #include <folly/Baton.h>
42 #include <folly/experimental/io/FsUtil.h>
44 using namespace folly;
53 static void customDeleter(Widget* w, TLPDestructionMode mode) {
54 totalVal_ += (mode == TLPDestructionMode::ALL_THREADS) * 1000;
58 int Widget::totalVal_ = 0;
60 TEST(ThreadLocalPtr, BasicDestructor) {
61 Widget::totalVal_ = 0;
62 ThreadLocalPtr<Widget> w;
64 w.reset(new Widget());
67 EXPECT_EQ(10, Widget::totalVal_);
70 TEST(ThreadLocalPtr, CustomDeleter1) {
71 Widget::totalVal_ = 0;
73 ThreadLocalPtr<Widget> w;
75 w.reset(new Widget(), Widget::customDeleter);
78 EXPECT_EQ(10, Widget::totalVal_);
80 EXPECT_EQ(10, Widget::totalVal_);
83 TEST(ThreadLocalPtr, resetNull) {
84 ThreadLocalPtr<int> tl;
87 EXPECT_TRUE(static_cast<bool>(tl));
88 EXPECT_EQ(*tl.get(), 4);
93 TEST(ThreadLocalPtr, TestRelease) {
94 Widget::totalVal_ = 0;
95 ThreadLocalPtr<Widget> w;
96 std::unique_ptr<Widget> wPtr;
97 std::thread([&w, &wPtr]() {
98 w.reset(new Widget());
101 wPtr.reset(w.release());
103 EXPECT_EQ(0, Widget::totalVal_);
105 EXPECT_EQ(10, Widget::totalVal_);
108 TEST(ThreadLocalPtr, CreateOnThreadExit) {
109 Widget::totalVal_ = 0;
110 ThreadLocal<Widget> w;
111 ThreadLocalPtr<int> tl;
114 tl.reset(new int(1), [&] (int* ptr, TLPDestructionMode mode) {
116 // This test ensures Widgets allocated here are not leaked.
118 ThreadLocal<Widget> wl;
122 EXPECT_EQ(2, Widget::totalVal_);
125 // Test deleting the ThreadLocalPtr object
126 TEST(ThreadLocalPtr, CustomDeleter2) {
127 Widget::totalVal_ = 0;
130 std::condition_variable cv;
136 State state = State::START;
138 ThreadLocalPtr<Widget> w;
139 t = std::thread([&]() {
140 w.reset(new Widget(), Widget::customDeleter);
143 // Notify main thread that we're done
145 std::unique_lock<std::mutex> lock(mutex);
150 // Wait for main thread to allow us to exit
152 std::unique_lock<std::mutex> lock(mutex);
153 while (state != State::EXIT) {
159 // Wait for main thread to start (and set w.get()->val_)
161 std::unique_lock<std::mutex> lock(mutex);
162 while (state != State::DONE) {
167 // Thread started but hasn't exited yet
168 EXPECT_EQ(0, Widget::totalVal_);
170 // Destroy ThreadLocalPtr<Widget> (by letting it go out of scope)
173 EXPECT_EQ(1010, Widget::totalVal_);
175 // Allow thread to exit
177 std::unique_lock<std::mutex> lock(mutex);
183 EXPECT_EQ(1010, Widget::totalVal_);
186 TEST(ThreadLocal, BasicDestructor) {
187 Widget::totalVal_ = 0;
188 ThreadLocal<Widget> w;
189 std::thread([&w]() { w->val_ += 10; }).join();
190 EXPECT_EQ(10, Widget::totalVal_);
193 TEST(ThreadLocal, SimpleRepeatDestructor) {
194 Widget::totalVal_ = 0;
196 ThreadLocal<Widget> w;
200 ThreadLocal<Widget> w;
203 EXPECT_EQ(20, Widget::totalVal_);
206 TEST(ThreadLocal, InterleavedDestructors) {
207 Widget::totalVal_ = 0;
208 std::unique_ptr<ThreadLocal<Widget>> w;
210 const int wVersionMax = 2;
213 auto th = std::thread([&]() {
214 int wVersionPrev = 0;
217 std::lock_guard<std::mutex> g(lock);
218 if (wVersion > wVersionMax) {
221 if (wVersion > wVersionPrev) {
222 // We have a new version of w, so it should be initialized to zero
223 EXPECT_EQ((*w)->val_, 0);
227 std::lock_guard<std::mutex> g(lock);
228 wVersionPrev = wVersion;
233 FOR_EACH_RANGE(i, 0, wVersionMax) {
236 std::lock_guard<std::mutex> g(lock);
238 w.reset(new ThreadLocal<Widget>());
242 std::lock_guard<std::mutex> g(lock);
243 if (thIter > thIterPrev) {
249 std::lock_guard<std::mutex> g(lock);
250 wVersion = wVersionMax + 1;
253 EXPECT_EQ(wVersionMax * 10, Widget::totalVal_);
256 class SimpleThreadCachedInt {
259 ThreadLocal<int,NewTag> val_;
268 for (const auto& i : val_.accessAllThreads()) {
275 TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
276 const int kNumThreads = 10;
277 SimpleThreadCachedInt stci;
278 std::atomic<bool> run(true);
279 std::atomic<int> totalAtomic(0);
280 std::vector<std::thread> threads;
281 for (int i = 0; i < kNumThreads; ++i) {
282 threads.push_back(std::thread([&,i]() {
284 totalAtomic.fetch_add(1);
285 while (run.load()) { usleep(100); }
288 while (totalAtomic.load() != kNumThreads) { usleep(100); }
289 EXPECT_EQ(kNumThreads, stci.read());
291 for (auto& t : threads) {
296 TEST(ThreadLocal, resetNull) {
298 tl.reset(new int(4));
299 EXPECT_EQ(*tl.get(), 4);
301 EXPECT_EQ(*tl.get(), 0);
302 tl.reset(new int(5));
303 EXPECT_EQ(*tl.get(), 5);
310 folly::ThreadLocal<int, Tag> tl;
314 TEST(ThreadLocal, Movable1) {
317 EXPECT_TRUE(a.tl.get() != b.tl.get());
321 EXPECT_TRUE(a.tl.get() != b.tl.get());
324 TEST(ThreadLocal, Movable2) {
325 std::map<int, Foo> map;
333 for (auto& m : map) {
334 tls.insert(m.second.tl.get());
337 // Make sure that we have 4 different instances of *tl
338 EXPECT_EQ(4, tls.size());
343 constexpr size_t kFillObjectSize = 300;
345 std::atomic<uint64_t> gDestroyed;
348 * Fill a chunk of memory with a unique-ish pattern that includes the thread id
349 * (so deleting one of these from another thread would cause a failure)
351 * Verify it explicitly and on destruction.
355 explicit FillObject(uint64_t idx) : idx_(idx) {
357 for (size_t i = 0; i < kFillObjectSize; ++i) {
364 for (size_t i = 0; i < kFillObjectSize; ++i) {
365 CHECK_EQ(v, data_[i]);
374 uint64_t val() const {
375 return (idx_ << 40) | uint64_t(pthread_self());
379 uint64_t data_[kFillObjectSize];
384 #if FOLLY_HAVE_STD_THIS_THREAD_SLEEP_FOR
385 TEST(ThreadLocal, Stress) {
386 constexpr size_t numFillObjects = 250;
387 std::array<ThreadLocalPtr<FillObject>, numFillObjects> objects;
389 constexpr size_t numThreads = 32;
390 constexpr size_t numReps = 20;
392 std::vector<std::thread> threads;
393 threads.reserve(numThreads);
395 for (size_t i = 0; i < numThreads; ++i) {
396 threads.emplace_back([&objects] {
397 for (size_t rep = 0; rep < numReps; ++rep) {
398 for (size_t i = 0; i < objects.size(); ++i) {
399 objects[i].reset(new FillObject(rep * objects.size() + i));
400 std::this_thread::sleep_for(std::chrono::microseconds(100));
402 for (size_t i = 0; i < objects.size(); ++i) {
409 for (auto& t : threads) {
413 EXPECT_EQ(numFillObjects * numThreads * numReps, gDestroyed);
417 // Yes, threads and fork don't mix
418 // (http://cppwisdom.quora.com/Why-threads-and-fork-dont-mix) but if you're
419 // stupid or desperate enough to try, we shouldn't stand in your way.
423 HoldsOne() : value_(1) { }
424 // Do an actual access to catch the buggy case where this == nullptr
425 int value() const { return value_; }
430 struct HoldsOneTag {};
432 ThreadLocal<HoldsOne, HoldsOneTag> ptr;
436 for (auto& p : ptr.accessAllThreads()) {
444 #ifdef FOLLY_HAVE_PTHREAD_ATFORK
445 TEST(ThreadLocal, Fork) {
446 EXPECT_EQ(1, ptr->value()); // ensure created
447 EXPECT_EQ(1, totalValue());
448 // Spawn a new thread
451 bool started = false;
452 std::condition_variable startedCond;
453 bool stopped = false;
454 std::condition_variable stoppedCond;
456 std::thread t([&] () {
457 EXPECT_EQ(1, ptr->value()); // ensure created
459 std::unique_lock<std::mutex> lock(mutex);
461 startedCond.notify_all();
464 std::unique_lock<std::mutex> lock(mutex);
466 stoppedCond.wait(lock);
472 std::unique_lock<std::mutex> lock(mutex);
474 startedCond.wait(lock);
478 EXPECT_EQ(2, totalValue());
483 int v = totalValue();
485 // exit successfully if v == 1 (one thread)
486 // diagnostic error code otherwise :)
492 } else if (pid > 0) {
495 EXPECT_EQ(pid, waitpid(pid, &status, 0));
496 EXPECT_TRUE(WIFEXITED(status));
497 EXPECT_EQ(0, WEXITSTATUS(status));
499 EXPECT_TRUE(false) << "fork failed";
502 EXPECT_EQ(2, totalValue());
505 std::unique_lock<std::mutex> lock(mutex);
507 stoppedCond.notify_all();
512 EXPECT_EQ(1, totalValue());
516 struct HoldsOneTag2 {};
518 TEST(ThreadLocal, Fork2) {
519 // A thread-local tag that was used in the parent from a *different* thread
520 // (but not the forking thread) would cause the child to hang in a
521 // ThreadLocalPtr's object destructor. Yeah.
522 ThreadLocal<HoldsOne, HoldsOneTag2> p;
524 // use tag in different thread
525 std::thread t([&p] { p.get(); });
531 ThreadLocal<HoldsOne, HoldsOneTag2> q;
535 } else if (pid > 0) {
537 EXPECT_EQ(pid, waitpid(pid, &status, 0));
538 EXPECT_TRUE(WIFEXITED(status));
539 EXPECT_EQ(0, WEXITSTATUS(status));
541 EXPECT_TRUE(false) << "fork failed";
545 TEST(ThreadLocal, SharedLibrary) {
546 auto exe = fs::executable_path();
547 auto lib = exe.parent_path() / "lib_thread_local_test.so";
548 auto handle = dlopen(lib.string().c_str(), RTLD_LAZY);
549 EXPECT_NE(nullptr, handle);
551 typedef void (*useA_t)();
553 useA_t useA = (useA_t) dlsym(handle, "useA");
555 const char *dlsym_error = dlerror();
556 EXPECT_EQ(nullptr, dlsym_error);
560 folly::Baton<> b11, b12, b21, b22;
562 std::thread t1([&]() {
568 std::thread t2([&]() {
586 namespace folly { namespace threadlocal_detail {
587 struct PthreadKeyUnregisterTester {
588 PthreadKeyUnregister p;
589 constexpr PthreadKeyUnregisterTester() = default;
593 TEST(ThreadLocal, UnregisterClassHasConstExprCtor) {
594 folly::threadlocal_detail::PthreadKeyUnregisterTester x;
599 // clang is unable to compile this code unless in c++14 mode.
600 #if __cplusplus >= 201402L
602 // This will fail to compile unless ThreadLocal{Ptr} has a constexpr
603 // default constructor. This ensures that ThreadLocal is safe to use in
604 // static constructors without worrying about initialization order
605 class ConstexprThreadLocalCompile {
607 ThreadLocalPtr<int> b_;
609 constexpr ConstexprThreadLocalCompile() {}
614 // Simple reference implementation using pthread_get_specific
616 class PThreadGetSpecific {
618 PThreadGetSpecific() : key_(0) {
619 pthread_key_create(&key_, OnThreadExit);
623 return static_cast<T*>(pthread_getspecific(key_));
628 pthread_setspecific(key_, t);
630 static void OnThreadExit(void* obj) {
631 delete static_cast<T*>(obj);
637 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
640 BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) { \
641 const int itersPerThread = iters / FLAGS_numThreads; \
642 std::vector<std::thread> threads; \
643 for (int i = 0; i < FLAGS_numThreads; ++i) { \
644 threads.push_back(std::thread([&]() { \
645 var.reset(new int(0)); \
646 for (int i = 0; i < itersPerThread; ++i) { \
651 for (auto& t : threads) { \
656 ThreadLocalPtr<int> tlp;
658 PThreadGetSpecific<int> pthread_get_specific;
659 REG(pthread_get_specific);
660 boost::thread_specific_ptr<int> boost_tsp;
662 BENCHMARK_DRAW_LINE();
664 int main(int argc, char** argv) {
665 testing::InitGoogleTest(&argc, argv);
666 gflags::ParseCommandLineFlags(&argc, &argv, true);
667 gflags::SetCommandLineOptionWithMode(
668 "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT
670 if (FLAGS_benchmark) {
671 folly::runBenchmarks();
673 return RUN_ALL_TESTS();
677 Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
679 Benchmark Iters Total t t/iter iter/sec
680 ------------------------------------------------------------------------------
681 * BM_mt_tlp 100000000 39.88 ms 398.8 ps 2.335 G
682 +5.91% BM_mt_pthread_get_specific 100000000 42.23 ms 422.3 ps 2.205 G
683 + 295% BM_mt_boost_tsp 100000000 157.8 ms 1.578 ns 604.5 M
684 ------------------------------------------------------------------------------