2 * Copyright 2012 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "folly/ThreadLocal.h"
20 #include <unordered_map>
24 #include <condition_variable>
26 #include <boost/thread/tss.hpp>
27 #include <gtest/gtest.h>
28 #include <gflags/gflags.h>
29 #include <glog/logging.h>
30 #include "folly/Benchmark.h"
32 using namespace folly;
41 static void customDeleter(Widget* w, TLPDestructionMode mode) {
42 totalVal_ += (mode == TLPDestructionMode::ALL_THREADS) * 1000;
46 int Widget::totalVal_ = 0;
48 TEST(ThreadLocalPtr, BasicDestructor) {
49 Widget::totalVal_ = 0;
50 ThreadLocalPtr<Widget> w;
52 w.reset(new Widget());
55 EXPECT_EQ(10, Widget::totalVal_);
58 TEST(ThreadLocalPtr, CustomDeleter1) {
59 Widget::totalVal_ = 0;
61 ThreadLocalPtr<Widget> w;
63 w.reset(new Widget(), Widget::customDeleter);
66 EXPECT_EQ(10, Widget::totalVal_);
68 EXPECT_EQ(10, Widget::totalVal_);
71 TEST(ThreadLocalPtr, resetNull) {
72 ThreadLocalPtr<int> tl;
75 EXPECT_TRUE(static_cast<bool>(tl));
76 EXPECT_EQ(*tl.get(), 4);
81 // Test deleting the ThreadLocalPtr object
82 TEST(ThreadLocalPtr, CustomDeleter2) {
83 Widget::totalVal_ = 0;
86 std::condition_variable cv;
92 State state = State::START;
94 ThreadLocalPtr<Widget> w;
95 t = std::thread([&]() {
96 w.reset(new Widget(), Widget::customDeleter);
99 // Notify main thread that we're done
101 std::unique_lock<std::mutex> lock(mutex);
106 // Wait for main thread to allow us to exit
108 std::unique_lock<std::mutex> lock(mutex);
109 while (state != State::EXIT) {
115 // Wait for main thread to start (and set w.get()->val_)
117 std::unique_lock<std::mutex> lock(mutex);
118 while (state != State::DONE) {
123 // Thread started but hasn't exited yet
124 EXPECT_EQ(0, Widget::totalVal_);
126 // Destroy ThreadLocalPtr<Widget> (by letting it go out of scope)
129 EXPECT_EQ(1010, Widget::totalVal_);
131 // Allow thread to exit
133 std::unique_lock<std::mutex> lock(mutex);
139 EXPECT_EQ(1010, Widget::totalVal_);
142 TEST(ThreadLocal, BasicDestructor) {
143 Widget::totalVal_ = 0;
144 ThreadLocal<Widget> w;
145 std::thread([&w]() { w->val_ += 10; }).join();
146 EXPECT_EQ(10, Widget::totalVal_);
149 TEST(ThreadLocal, SimpleRepeatDestructor) {
150 Widget::totalVal_ = 0;
152 ThreadLocal<Widget> w;
156 ThreadLocal<Widget> w;
159 EXPECT_EQ(20, Widget::totalVal_);
162 TEST(ThreadLocal, InterleavedDestructors) {
163 Widget::totalVal_ = 0;
164 ThreadLocal<Widget>* w = NULL;
166 const int wVersionMax = 2;
169 auto th = std::thread([&]() {
170 int wVersionPrev = 0;
173 std::lock_guard<std::mutex> g(lock);
174 if (wVersion > wVersionMax) {
177 if (wVersion > wVersionPrev) {
178 // We have a new version of w, so it should be initialized to zero
179 EXPECT_EQ((*w)->val_, 0);
183 std::lock_guard<std::mutex> g(lock);
184 wVersionPrev = wVersion;
189 FOR_EACH_RANGE(i, 0, wVersionMax) {
192 std::lock_guard<std::mutex> g(lock);
195 w = new ThreadLocal<Widget>();
199 std::lock_guard<std::mutex> g(lock);
200 if (thIter > thIterPrev) {
206 std::lock_guard<std::mutex> g(lock);
207 wVersion = wVersionMax + 1;
210 EXPECT_EQ(wVersionMax * 10, Widget::totalVal_);
213 class SimpleThreadCachedInt {
216 ThreadLocal<int,NewTag> val_;
225 for (const auto& i : val_.accessAllThreads()) {
232 TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
233 const int kNumThreads = 10;
234 SimpleThreadCachedInt stci;
235 std::atomic<bool> run(true);
236 std::atomic<int> totalAtomic(0);
237 std::vector<std::thread> threads;
238 for (int i = 0; i < kNumThreads; ++i) {
239 threads.push_back(std::thread([&,i]() {
241 totalAtomic.fetch_add(1);
242 while (run.load()) { usleep(100); }
245 while (totalAtomic.load() != kNumThreads) { usleep(100); }
246 EXPECT_EQ(kNumThreads, stci.read());
248 for (auto& t : threads) {
253 TEST(ThreadLocal, resetNull) {
255 tl.reset(new int(4));
256 EXPECT_EQ(*tl.get(), 4);
258 EXPECT_EQ(*tl.get(), 0);
259 tl.reset(new int(5));
260 EXPECT_EQ(*tl.get(), 5);
267 folly::ThreadLocal<int, Tag> tl;
271 TEST(ThreadLocal, Movable1) {
274 EXPECT_TRUE(a.tl.get() != b.tl.get());
278 EXPECT_TRUE(a.tl.get() != b.tl.get());
281 TEST(ThreadLocal, Movable2) {
282 std::map<int, Foo> map;
290 for (auto& m : map) {
291 tls.insert(m.second.tl.get());
294 // Make sure that we have 4 different instances of *tl
295 EXPECT_EQ(4, tls.size());
298 // Simple reference implementation using pthread_get_specific
300 class PThreadGetSpecific {
302 PThreadGetSpecific() : key_(0) {
303 pthread_key_create(&key_, OnThreadExit);
307 return static_cast<T*>(pthread_getspecific(key_));
312 pthread_setspecific(key_, t);
314 static void OnThreadExit(void* obj) {
315 delete static_cast<T*>(obj);
321 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
324 BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) { \
325 const int itersPerThread = iters / FLAGS_numThreads; \
326 std::vector<std::thread> threads; \
327 for (int i = 0; i < FLAGS_numThreads; ++i) { \
328 threads.push_back(std::thread([&]() { \
329 var.reset(new int(0)); \
330 for (int i = 0; i < itersPerThread; ++i) { \
335 for (auto& t : threads) { \
340 ThreadLocalPtr<int> tlp;
342 PThreadGetSpecific<int> pthread_get_specific;
343 REG(pthread_get_specific);
344 boost::thread_specific_ptr<int> boost_tsp;
346 BENCHMARK_DRAW_LINE();
348 int main(int argc, char** argv) {
349 testing::InitGoogleTest(&argc, argv);
350 google::ParseCommandLineFlags(&argc, &argv, true);
351 google::SetCommandLineOptionWithMode(
352 "bm_max_iters", "100000000", google::SET_FLAG_IF_DEFAULT
354 if (FLAGS_benchmark) {
355 folly::runBenchmarks();
357 return RUN_ALL_TESTS();
361 Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
363 Benchmark Iters Total t t/iter iter/sec
364 ------------------------------------------------------------------------------
365 * BM_mt_tlp 100000000 39.88 ms 398.8 ps 2.335 G
366 +5.91% BM_mt_pthread_get_specific 100000000 42.23 ms 422.3 ps 2.205 G
367 + 295% BM_mt_boost_tsp 100000000 157.8 ms 1.578 ns 604.5 M
368 ------------------------------------------------------------------------------