2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/ThreadLocal.h>
19 #include <sys/types.h>
26 #include <condition_variable>
32 #include <unordered_map>
34 #include <boost/thread/tss.hpp>
35 #include <gflags/gflags.h>
36 #include <glog/logging.h>
37 #include <gtest/gtest.h>
39 #include <folly/Benchmark.h>
41 using namespace folly;
50 static void customDeleter(Widget* w, TLPDestructionMode mode) {
51 totalVal_ += (mode == TLPDestructionMode::ALL_THREADS) * 1000;
55 int Widget::totalVal_ = 0;
57 TEST(ThreadLocalPtr, BasicDestructor) {
58 Widget::totalVal_ = 0;
59 ThreadLocalPtr<Widget> w;
61 w.reset(new Widget());
64 EXPECT_EQ(10, Widget::totalVal_);
67 TEST(ThreadLocalPtr, CustomDeleter1) {
68 Widget::totalVal_ = 0;
70 ThreadLocalPtr<Widget> w;
72 w.reset(new Widget(), Widget::customDeleter);
75 EXPECT_EQ(10, Widget::totalVal_);
77 EXPECT_EQ(10, Widget::totalVal_);
80 TEST(ThreadLocalPtr, resetNull) {
81 ThreadLocalPtr<int> tl;
84 EXPECT_TRUE(static_cast<bool>(tl));
85 EXPECT_EQ(*tl.get(), 4);
90 TEST(ThreadLocalPtr, TestRelease) {
91 Widget::totalVal_ = 0;
92 ThreadLocalPtr<Widget> w;
93 std::unique_ptr<Widget> wPtr;
94 std::thread([&w, &wPtr]() {
95 w.reset(new Widget());
98 wPtr.reset(w.release());
100 EXPECT_EQ(0, Widget::totalVal_);
102 EXPECT_EQ(10, Widget::totalVal_);
105 TEST(ThreadLocalPtr, CreateOnThreadExit) {
106 Widget::totalVal_ = 0;
107 ThreadLocal<Widget> w;
108 ThreadLocalPtr<int> tl;
111 tl.reset(new int(1), [&] (int* ptr, TLPDestructionMode mode) {
113 // This test ensures Widgets allocated here are not leaked.
115 ThreadLocal<Widget> wl;
119 EXPECT_EQ(2, Widget::totalVal_);
122 // Test deleting the ThreadLocalPtr object
123 TEST(ThreadLocalPtr, CustomDeleter2) {
124 Widget::totalVal_ = 0;
127 std::condition_variable cv;
133 State state = State::START;
135 ThreadLocalPtr<Widget> w;
136 t = std::thread([&]() {
137 w.reset(new Widget(), Widget::customDeleter);
140 // Notify main thread that we're done
142 std::unique_lock<std::mutex> lock(mutex);
147 // Wait for main thread to allow us to exit
149 std::unique_lock<std::mutex> lock(mutex);
150 while (state != State::EXIT) {
156 // Wait for main thread to start (and set w.get()->val_)
158 std::unique_lock<std::mutex> lock(mutex);
159 while (state != State::DONE) {
164 // Thread started but hasn't exited yet
165 EXPECT_EQ(0, Widget::totalVal_);
167 // Destroy ThreadLocalPtr<Widget> (by letting it go out of scope)
170 EXPECT_EQ(1010, Widget::totalVal_);
172 // Allow thread to exit
174 std::unique_lock<std::mutex> lock(mutex);
180 EXPECT_EQ(1010, Widget::totalVal_);
183 TEST(ThreadLocal, BasicDestructor) {
184 Widget::totalVal_ = 0;
185 ThreadLocal<Widget> w;
186 std::thread([&w]() { w->val_ += 10; }).join();
187 EXPECT_EQ(10, Widget::totalVal_);
190 TEST(ThreadLocal, SimpleRepeatDestructor) {
191 Widget::totalVal_ = 0;
193 ThreadLocal<Widget> w;
197 ThreadLocal<Widget> w;
200 EXPECT_EQ(20, Widget::totalVal_);
203 TEST(ThreadLocal, InterleavedDestructors) {
204 Widget::totalVal_ = 0;
205 std::unique_ptr<ThreadLocal<Widget>> w;
207 const int wVersionMax = 2;
210 auto th = std::thread([&]() {
211 int wVersionPrev = 0;
214 std::lock_guard<std::mutex> g(lock);
215 if (wVersion > wVersionMax) {
218 if (wVersion > wVersionPrev) {
219 // We have a new version of w, so it should be initialized to zero
220 EXPECT_EQ((*w)->val_, 0);
224 std::lock_guard<std::mutex> g(lock);
225 wVersionPrev = wVersion;
230 FOR_EACH_RANGE(i, 0, wVersionMax) {
233 std::lock_guard<std::mutex> g(lock);
235 w.reset(new ThreadLocal<Widget>());
239 std::lock_guard<std::mutex> g(lock);
240 if (thIter > thIterPrev) {
246 std::lock_guard<std::mutex> g(lock);
247 wVersion = wVersionMax + 1;
250 EXPECT_EQ(wVersionMax * 10, Widget::totalVal_);
253 TEST(ThreadLocalPtr, ODRUseEntryIDkInvalid) {
254 // EntryID::kInvalid is odr-used
255 // see http://en.cppreference.com/w/cpp/language/static
256 const uint32_t* pInvalid =
257 &(threadlocal_detail::StaticMeta<void>::EntryID::kInvalid);
258 EXPECT_EQ(std::numeric_limits<uint32_t>::max(), *pInvalid);
261 class SimpleThreadCachedInt {
264 ThreadLocal<int,NewTag> val_;
273 for (const auto& i : val_.accessAllThreads()) {
280 TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
281 const int kNumThreads = 10;
282 SimpleThreadCachedInt stci;
283 std::atomic<bool> run(true);
284 std::atomic<int> totalAtomic(0);
285 std::vector<std::thread> threads;
286 for (int i = 0; i < kNumThreads; ++i) {
287 threads.push_back(std::thread([&,i]() {
289 totalAtomic.fetch_add(1);
290 while (run.load()) { usleep(100); }
293 while (totalAtomic.load() != kNumThreads) { usleep(100); }
294 EXPECT_EQ(kNumThreads, stci.read());
296 for (auto& t : threads) {
301 TEST(ThreadLocal, resetNull) {
303 tl.reset(new int(4));
304 EXPECT_EQ(*tl.get(), 4);
306 EXPECT_EQ(*tl.get(), 0);
307 tl.reset(new int(5));
308 EXPECT_EQ(*tl.get(), 5);
315 folly::ThreadLocal<int, Tag> tl;
319 TEST(ThreadLocal, Movable1) {
322 EXPECT_TRUE(a.tl.get() != b.tl.get());
326 EXPECT_TRUE(a.tl.get() != b.tl.get());
329 TEST(ThreadLocal, Movable2) {
330 std::map<int, Foo> map;
338 for (auto& m : map) {
339 tls.insert(m.second.tl.get());
342 // Make sure that we have 4 different instances of *tl
343 EXPECT_EQ(4, tls.size());
348 constexpr size_t kFillObjectSize = 300;
350 std::atomic<uint64_t> gDestroyed;
353 * Fill a chunk of memory with a unique-ish pattern that includes the thread id
354 * (so deleting one of these from another thread would cause a failure)
356 * Verify it explicitly and on destruction.
360 explicit FillObject(uint64_t idx) : idx_(idx) {
362 for (size_t i = 0; i < kFillObjectSize; ++i) {
369 for (size_t i = 0; i < kFillObjectSize; ++i) {
370 CHECK_EQ(v, data_[i]);
379 uint64_t val() const {
380 return (idx_ << 40) | uint64_t(pthread_self());
384 uint64_t data_[kFillObjectSize];
389 #if FOLLY_HAVE_STD_THIS_THREAD_SLEEP_FOR
390 TEST(ThreadLocal, Stress) {
391 constexpr size_t numFillObjects = 250;
392 std::array<ThreadLocalPtr<FillObject>, numFillObjects> objects;
394 constexpr size_t numThreads = 32;
395 constexpr size_t numReps = 20;
397 std::vector<std::thread> threads;
398 threads.reserve(numThreads);
400 for (size_t i = 0; i < numThreads; ++i) {
401 threads.emplace_back([&objects] {
402 for (size_t rep = 0; rep < numReps; ++rep) {
403 for (size_t i = 0; i < objects.size(); ++i) {
404 objects[i].reset(new FillObject(rep * objects.size() + i));
405 std::this_thread::sleep_for(std::chrono::microseconds(100));
407 for (size_t i = 0; i < objects.size(); ++i) {
414 for (auto& t : threads) {
418 EXPECT_EQ(numFillObjects * numThreads * numReps, gDestroyed);
422 // Yes, threads and fork don't mix
423 // (http://cppwisdom.quora.com/Why-threads-and-fork-dont-mix) but if you're
424 // stupid or desperate enough to try, we shouldn't stand in your way.
428 HoldsOne() : value_(1) { }
429 // Do an actual access to catch the buggy case where this == nullptr
430 int value() const { return value_; }
435 struct HoldsOneTag {};
437 ThreadLocal<HoldsOne, HoldsOneTag> ptr;
441 for (auto& p : ptr.accessAllThreads()) {
449 #ifdef FOLLY_HAVE_PTHREAD_ATFORK
450 TEST(ThreadLocal, Fork) {
451 EXPECT_EQ(1, ptr->value()); // ensure created
452 EXPECT_EQ(1, totalValue());
453 // Spawn a new thread
456 bool started = false;
457 std::condition_variable startedCond;
458 bool stopped = false;
459 std::condition_variable stoppedCond;
461 std::thread t([&] () {
462 EXPECT_EQ(1, ptr->value()); // ensure created
464 std::unique_lock<std::mutex> lock(mutex);
466 startedCond.notify_all();
469 std::unique_lock<std::mutex> lock(mutex);
471 stoppedCond.wait(lock);
477 std::unique_lock<std::mutex> lock(mutex);
479 startedCond.wait(lock);
483 EXPECT_EQ(2, totalValue());
488 int v = totalValue();
490 // exit successfully if v == 1 (one thread)
491 // diagnostic error code otherwise :)
497 } else if (pid > 0) {
500 EXPECT_EQ(pid, waitpid(pid, &status, 0));
501 EXPECT_TRUE(WIFEXITED(status));
502 EXPECT_EQ(0, WEXITSTATUS(status));
504 EXPECT_TRUE(false) << "fork failed";
507 EXPECT_EQ(2, totalValue());
510 std::unique_lock<std::mutex> lock(mutex);
512 stoppedCond.notify_all();
517 EXPECT_EQ(1, totalValue());
521 struct HoldsOneTag2 {};
523 TEST(ThreadLocal, Fork2) {
524 // A thread-local tag that was used in the parent from a *different* thread
525 // (but not the forking thread) would cause the child to hang in a
526 // ThreadLocalPtr's object destructor. Yeah.
527 ThreadLocal<HoldsOne, HoldsOneTag2> p;
529 // use tag in different thread
530 std::thread t([&p] { p.get(); });
536 ThreadLocal<HoldsOne, HoldsOneTag2> q;
540 } else if (pid > 0) {
542 EXPECT_EQ(pid, waitpid(pid, &status, 0));
543 EXPECT_TRUE(WIFEXITED(status));
544 EXPECT_EQ(0, WEXITSTATUS(status));
546 EXPECT_TRUE(false) << "fork failed";
550 // clang is unable to compile this code unless in c++14 mode.
551 #if __cplusplus >= 201402L
553 // This will fail to compile unless ThreadLocal{Ptr} has a constexpr
554 // default constructor. This ensures that ThreadLocal is safe to use in
555 // static constructors without worrying about initialization order
556 class ConstexprThreadLocalCompile {
558 ThreadLocalPtr<int> b_;
560 constexpr ConstexprThreadLocalCompile() {}
565 // Simple reference implementation using pthread_get_specific
567 class PThreadGetSpecific {
569 PThreadGetSpecific() : key_(0) {
570 pthread_key_create(&key_, OnThreadExit);
574 return static_cast<T*>(pthread_getspecific(key_));
579 pthread_setspecific(key_, t);
581 static void OnThreadExit(void* obj) {
582 delete static_cast<T*>(obj);
588 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
591 BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) { \
592 const int itersPerThread = iters / FLAGS_numThreads; \
593 std::vector<std::thread> threads; \
594 for (int i = 0; i < FLAGS_numThreads; ++i) { \
595 threads.push_back(std::thread([&]() { \
596 var.reset(new int(0)); \
597 for (int i = 0; i < itersPerThread; ++i) { \
602 for (auto& t : threads) { \
607 ThreadLocalPtr<int> tlp;
609 PThreadGetSpecific<int> pthread_get_specific;
610 REG(pthread_get_specific);
611 boost::thread_specific_ptr<int> boost_tsp;
613 BENCHMARK_DRAW_LINE();
615 int main(int argc, char** argv) {
616 testing::InitGoogleTest(&argc, argv);
617 gflags::ParseCommandLineFlags(&argc, &argv, true);
618 gflags::SetCommandLineOptionWithMode(
619 "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT
621 if (FLAGS_benchmark) {
622 folly::runBenchmarks();
624 return RUN_ALL_TESTS();
628 Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
630 Benchmark Iters Total t t/iter iter/sec
631 ------------------------------------------------------------------------------
632 * BM_mt_tlp 100000000 39.88 ms 398.8 ps 2.335 G
633 +5.91% BM_mt_pthread_get_specific 100000000 42.23 ms 422.3 ps 2.205 G
634 + 295% BM_mt_boost_tsp 100000000 157.8 ms 1.578 ns 604.5 M
635 ------------------------------------------------------------------------------