folly/test/ThreadLocalTest.cpp

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/ThreadLocal.h>
  18
  19 #include <sys/types.h>
  20 #include <sys/wait.h>
  21 #include <unistd.h>
  22
  23 #include <array>
  24 #include <atomic>
  25 #include <chrono>
  26 #include <condition_variable>
  27 #include <limits.h>
  28 #include <map>
  29 #include <mutex>
  30 #include <set>
  31 #include <thread>
  32 #include <unordered_map>
  33
  34 #include <boost/thread/tss.hpp>
  35 #include <gflags/gflags.h>
  36 #include <glog/logging.h>
  37 #include <gtest/gtest.h>
  38
  39 #include <folly/Benchmark.h>
  40
  41 using namespace folly;
  42
  43 struct Widget {
  44   static int totalVal_;
  45   int val_;
  46   ~Widget() {
  47     totalVal_ += val_;
  48   }
  49
  50   static void customDeleter(Widget* w, TLPDestructionMode mode) {
  51     totalVal_ += (mode == TLPDestructionMode::ALL_THREADS) * 1000;
  52     delete w;
  53   }
  54 };
  55 int Widget::totalVal_ = 0;
  56
  57 TEST(ThreadLocalPtr, BasicDestructor) {
  58   Widget::totalVal_ = 0;
  59   ThreadLocalPtr<Widget> w;
  60   std::thread([&w]() {
  61       w.reset(new Widget());
  62       w.get()->val_ += 10;
  63     }).join();
  64   EXPECT_EQ(10, Widget::totalVal_);
  65 }
  66
  67 TEST(ThreadLocalPtr, CustomDeleter1) {
  68   Widget::totalVal_ = 0;
  69   {
  70     ThreadLocalPtr<Widget> w;
  71     std::thread([&w]() {
  72         w.reset(new Widget(), Widget::customDeleter);
  73         w.get()->val_ += 10;
  74       }).join();
  75     EXPECT_EQ(10, Widget::totalVal_);
  76   }
  77   EXPECT_EQ(10, Widget::totalVal_);
  78 }
  79
  80 TEST(ThreadLocalPtr, resetNull) {
  81   ThreadLocalPtr<int> tl;
  82   EXPECT_FALSE(tl);
  83   tl.reset(new int(4));
  84   EXPECT_TRUE(static_cast<bool>(tl));
  85   EXPECT_EQ(*tl.get(), 4);
  86   tl.reset();
  87   EXPECT_FALSE(tl);
  88 }
  89
  90 TEST(ThreadLocalPtr, TestRelease) {
  91   Widget::totalVal_ = 0;
  92   ThreadLocalPtr<Widget> w;
  93   std::unique_ptr<Widget> wPtr;
  94   std::thread([&w, &wPtr]() {
  95       w.reset(new Widget());
  96       w.get()->val_ += 10;
  97
  98       wPtr.reset(w.release());
  99     }).join();
 100   EXPECT_EQ(0, Widget::totalVal_);
 101   wPtr.reset();
 102   EXPECT_EQ(10, Widget::totalVal_);
 103 }
 104
 105 TEST(ThreadLocalPtr, CreateOnThreadExit) {
 106   Widget::totalVal_ = 0;
 107   ThreadLocal<Widget> w;
 108   ThreadLocalPtr<int> tl;
 109
 110   std::thread([&] {
 111       tl.reset(new int(1), [&] (int* ptr, TLPDestructionMode mode) {
 112         delete ptr;
 113         // This test ensures Widgets allocated here are not leaked.
 114         ++w.get()->val_;
 115         ThreadLocal<Widget> wl;
 116         ++wl.get()->val_;
 117       });
 118     }).join();
 119   EXPECT_EQ(2, Widget::totalVal_);
 120 }
 121
 122 // Test deleting the ThreadLocalPtr object
 123 TEST(ThreadLocalPtr, CustomDeleter2) {
 124   Widget::totalVal_ = 0;
 125   std::thread t;
 126   std::mutex mutex;
 127   std::condition_variable cv;
 128   enum class State {
 129     START,
 130     DONE,
 131     EXIT
 132   };
 133   State state = State::START;
 134   {
 135     ThreadLocalPtr<Widget> w;
 136     t = std::thread([&]() {
 137         w.reset(new Widget(), Widget::customDeleter);
 138         w.get()->val_ += 10;
 139
 140         // Notify main thread that we're done
 141         {
 142           std::unique_lock<std::mutex> lock(mutex);
 143           state = State::DONE;
 144           cv.notify_all();
 145         }
 146
 147         // Wait for main thread to allow us to exit
 148         {
 149           std::unique_lock<std::mutex> lock(mutex);
 150           while (state != State::EXIT) {
 151             cv.wait(lock);
 152           }
 153         }
 154     });
 155
 156     // Wait for main thread to start (and set w.get()->val_)
 157     {
 158       std::unique_lock<std::mutex> lock(mutex);
 159       while (state != State::DONE) {
 160         cv.wait(lock);
 161       }
 162     }
 163
 164     // Thread started but hasn't exited yet
 165     EXPECT_EQ(0, Widget::totalVal_);
 166
 167     // Destroy ThreadLocalPtr<Widget> (by letting it go out of scope)
 168   }
 169
 170   EXPECT_EQ(1010, Widget::totalVal_);
 171
 172   // Allow thread to exit
 173   {
 174     std::unique_lock<std::mutex> lock(mutex);
 175     state = State::EXIT;
 176     cv.notify_all();
 177   }
 178   t.join();
 179
 180   EXPECT_EQ(1010, Widget::totalVal_);
 181 }
 182
 183 TEST(ThreadLocal, BasicDestructor) {
 184   Widget::totalVal_ = 0;
 185   ThreadLocal<Widget> w;
 186   std::thread([&w]() { w->val_ += 10; }).join();
 187   EXPECT_EQ(10, Widget::totalVal_);
 188 }
 189
 190 TEST(ThreadLocal, SimpleRepeatDestructor) {
 191   Widget::totalVal_ = 0;
 192   {
 193     ThreadLocal<Widget> w;
 194     w->val_ += 10;
 195   }
 196   {
 197     ThreadLocal<Widget> w;
 198     w->val_ += 10;
 199   }
 200   EXPECT_EQ(20, Widget::totalVal_);
 201 }
 202
 203 TEST(ThreadLocal, InterleavedDestructors) {
 204   Widget::totalVal_ = 0;
 205   std::unique_ptr<ThreadLocal<Widget>> w;
 206   int wVersion = 0;
 207   const int wVersionMax = 2;
 208   int thIter = 0;
 209   std::mutex lock;
 210   auto th = std::thread([&]() {
 211     int wVersionPrev = 0;
 212     while (true) {
 213       while (true) {
 214         std::lock_guard<std::mutex> g(lock);
 215         if (wVersion > wVersionMax) {
 216           return;
 217         }
 218         if (wVersion > wVersionPrev) {
 219           // We have a new version of w, so it should be initialized to zero
 220           EXPECT_EQ((*w)->val_, 0);
 221           break;
 222         }
 223       }
 224       std::lock_guard<std::mutex> g(lock);
 225       wVersionPrev = wVersion;
 226       (*w)->val_ += 10;
 227       ++thIter;
 228     }
 229   });
 230   FOR_EACH_RANGE(i, 0, wVersionMax) {
 231     int thIterPrev = 0;
 232     {
 233       std::lock_guard<std::mutex> g(lock);
 234       thIterPrev = thIter;
 235       w.reset(new ThreadLocal<Widget>());
 236       ++wVersion;
 237     }
 238     while (true) {
 239       std::lock_guard<std::mutex> g(lock);
 240       if (thIter > thIterPrev) {
 241         break;
 242       }
 243     }
 244   }
 245   {
 246     std::lock_guard<std::mutex> g(lock);
 247     wVersion = wVersionMax + 1;
 248   }
 249   th.join();
 250   EXPECT_EQ(wVersionMax * 10, Widget::totalVal_);
 251 }
 252
 253 class SimpleThreadCachedInt {
 254
 255   class NewTag;
 256   ThreadLocal<int,NewTag> val_;
 257
 258  public:
 259   void add(int val) {
 260     *val_ += val;
 261   }
 262
 263   int read() {
 264     int ret = 0;
 265     for (const auto& i : val_.accessAllThreads()) {
 266       ret += i;
 267     }
 268     return ret;
 269   }
 270 };
 271
 272 TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
 273   const int kNumThreads = 10;
 274   SimpleThreadCachedInt stci;
 275   std::atomic<bool> run(true);
 276   std::atomic<int> totalAtomic(0);
 277   std::vector<std::thread> threads;
 278   for (int i = 0; i < kNumThreads; ++i) {
 279     threads.push_back(std::thread([&,i]() {
 280       stci.add(1);
 281       totalAtomic.fetch_add(1);
 282       while (run.load()) { usleep(100); }
 283     }));
 284   }
 285   while (totalAtomic.load() != kNumThreads) { usleep(100); }
 286   EXPECT_EQ(kNumThreads, stci.read());
 287   run.store(false);
 288   for (auto& t : threads) {
 289     t.join();
 290   }
 291 }
 292
 293 TEST(ThreadLocal, resetNull) {
 294   ThreadLocal<int> tl;
 295   tl.reset(new int(4));
 296   EXPECT_EQ(*tl.get(), 4);
 297   tl.reset();
 298   EXPECT_EQ(*tl.get(), 0);
 299   tl.reset(new int(5));
 300   EXPECT_EQ(*tl.get(), 5);
 301 }
 302
 303 namespace {
 304 struct Tag {};
 305
 306 struct Foo {
 307   folly::ThreadLocal<int, Tag> tl;
 308 };
 309 }  // namespace
 310
 311 TEST(ThreadLocal, Movable1) {
 312   Foo a;
 313   Foo b;
 314   EXPECT_TRUE(a.tl.get() != b.tl.get());
 315
 316   a = Foo();
 317   b = Foo();
 318   EXPECT_TRUE(a.tl.get() != b.tl.get());
 319 }
 320
 321 TEST(ThreadLocal, Movable2) {
 322   std::map<int, Foo> map;
 323
 324   map[42];
 325   map[10];
 326   map[23];
 327   map[100];
 328
 329   std::set<void*> tls;
 330   for (auto& m : map) {
 331     tls.insert(m.second.tl.get());
 332   }
 333
 334   // Make sure that we have 4 different instances of *tl
 335   EXPECT_EQ(4, tls.size());
 336 }
 337
 338 namespace {
 339
 340 constexpr size_t kFillObjectSize = 300;
 341
 342 std::atomic<uint64_t> gDestroyed;
 343
 344 /**
 345  * Fill a chunk of memory with a unique-ish pattern that includes the thread id
 346  * (so deleting one of these from another thread would cause a failure)
 347  *
 348  * Verify it explicitly and on destruction.
 349  */
 350 class FillObject {
 351  public:
 352   explicit FillObject(uint64_t idx) : idx_(idx) {
 353     uint64_t v = val();
 354     for (size_t i = 0; i < kFillObjectSize; ++i) {
 355       data_[i] = v;
 356     }
 357   }
 358
 359   void check() {
 360     uint64_t v = val();
 361     for (size_t i = 0; i < kFillObjectSize; ++i) {
 362       CHECK_EQ(v, data_[i]);
 363     }
 364   }
 365
 366   ~FillObject() {
 367     ++gDestroyed;
 368   }
 369
 370  private:
 371   uint64_t val() const {
 372     return (idx_ << 40) | uint64_t(pthread_self());
 373   }
 374
 375   uint64_t idx_;
 376   uint64_t data_[kFillObjectSize];
 377 };
 378
 379 }  // namespace
 380
 381 #if FOLLY_HAVE_STD_THIS_THREAD_SLEEP_FOR
 382 TEST(ThreadLocal, Stress) {
 383   constexpr size_t numFillObjects = 250;
 384   std::array<ThreadLocalPtr<FillObject>, numFillObjects> objects;
 385
 386   constexpr size_t numThreads = 32;
 387   constexpr size_t numReps = 20;
 388
 389   std::vector<std::thread> threads;
 390   threads.reserve(numThreads);
 391
 392   for (size_t i = 0; i < numThreads; ++i) {
 393     threads.emplace_back([&objects] {
 394       for (size_t rep = 0; rep < numReps; ++rep) {
 395         for (size_t i = 0; i < objects.size(); ++i) {
 396           objects[i].reset(new FillObject(rep * objects.size() + i));
 397           std::this_thread::sleep_for(std::chrono::microseconds(100));
 398         }
 399         for (size_t i = 0; i < objects.size(); ++i) {
 400           objects[i]->check();
 401         }
 402       }
 403     });
 404   }
 405
 406   for (auto& t : threads) {
 407     t.join();
 408   }
 409
 410   EXPECT_EQ(numFillObjects * numThreads * numReps, gDestroyed);
 411 }
 412 #endif
 413
 414 // Yes, threads and fork don't mix
 415 // (http://cppwisdom.quora.com/Why-threads-and-fork-dont-mix) but if you're
 416 // stupid or desperate enough to try, we shouldn't stand in your way.
 417 namespace {
 418 class HoldsOne {
 419  public:
 420   HoldsOne() : value_(1) { }
 421   // Do an actual access to catch the buggy case where this == nullptr
 422   int value() const { return value_; }
 423  private:
 424   int value_;
 425 };
 426
 427 struct HoldsOneTag {};
 428
 429 ThreadLocal<HoldsOne, HoldsOneTag> ptr;
 430
 431 int totalValue() {
 432   int value = 0;
 433   for (auto& p : ptr.accessAllThreads()) {
 434     value += p.value();
 435   }
 436   return value;
 437 }
 438
 439 }  // namespace
 440
 441 #ifdef FOLLY_HAVE_PTHREAD_ATFORK
 442 TEST(ThreadLocal, Fork) {
 443   EXPECT_EQ(1, ptr->value());  // ensure created
 444   EXPECT_EQ(1, totalValue());
 445   // Spawn a new thread
 446
 447   std::mutex mutex;
 448   bool started = false;
 449   std::condition_variable startedCond;
 450   bool stopped = false;
 451   std::condition_variable stoppedCond;
 452
 453   std::thread t([&] () {
 454     EXPECT_EQ(1, ptr->value());  // ensure created
 455     {
 456       std::unique_lock<std::mutex> lock(mutex);
 457       started = true;
 458       startedCond.notify_all();
 459     }
 460     {
 461       std::unique_lock<std::mutex> lock(mutex);
 462       while (!stopped) {
 463         stoppedCond.wait(lock);
 464       }
 465     }
 466   });
 467
 468   {
 469     std::unique_lock<std::mutex> lock(mutex);
 470     while (!started) {
 471       startedCond.wait(lock);
 472     }
 473   }
 474
 475   EXPECT_EQ(2, totalValue());
 476
 477   pid_t pid = fork();
 478   if (pid == 0) {
 479     // in child
 480     int v = totalValue();
 481
 482     // exit successfully if v == 1 (one thread)
 483     // diagnostic error code otherwise :)
 484     switch (v) {
 485     case 1: _exit(0);
 486     case 0: _exit(1);
 487     }
 488     _exit(2);
 489   } else if (pid > 0) {
 490     // in parent
 491     int status;
 492     EXPECT_EQ(pid, waitpid(pid, &status, 0));
 493     EXPECT_TRUE(WIFEXITED(status));
 494     EXPECT_EQ(0, WEXITSTATUS(status));
 495   } else {
 496     EXPECT_TRUE(false) << "fork failed";
 497   }
 498
 499   EXPECT_EQ(2, totalValue());
 500
 501   {
 502     std::unique_lock<std::mutex> lock(mutex);
 503     stopped = true;
 504     stoppedCond.notify_all();
 505   }
 506
 507   t.join();
 508
 509   EXPECT_EQ(1, totalValue());
 510 }
 511 #endif
 512
 513 struct HoldsOneTag2 {};
 514
 515 TEST(ThreadLocal, Fork2) {
 516   // A thread-local tag that was used in the parent from a *different* thread
 517   // (but not the forking thread) would cause the child to hang in a
 518   // ThreadLocalPtr's object destructor. Yeah.
 519   ThreadLocal<HoldsOne, HoldsOneTag2> p;
 520   {
 521     // use tag in different thread
 522     std::thread t([&p] { p.get(); });
 523     t.join();
 524   }
 525   pid_t pid = fork();
 526   if (pid == 0) {
 527     {
 528       ThreadLocal<HoldsOne, HoldsOneTag2> q;
 529       q.get();
 530     }
 531     _exit(0);
 532   } else if (pid > 0) {
 533     int status;
 534     EXPECT_EQ(pid, waitpid(pid, &status, 0));
 535     EXPECT_TRUE(WIFEXITED(status));
 536     EXPECT_EQ(0, WEXITSTATUS(status));
 537   } else {
 538     EXPECT_TRUE(false) << "fork failed";
 539   }
 540 }
 541
 542 // clang is unable to compile this code unless in c++14 mode.
 543 #if __cplusplus >= 201402L
 544 namespace {
 545 // This will fail to compile unless ThreadLocal{Ptr} has a constexpr
 546 // default constructor. This ensures that ThreadLocal is safe to use in
 547 // static constructors without worrying about initialization order
 548 class ConstexprThreadLocalCompile {
 549   ThreadLocal<int> a_;
 550   ThreadLocalPtr<int> b_;
 551
 552   constexpr ConstexprThreadLocalCompile() {}
 553 };
 554 }
 555 #endif
 556
 557 // Simple reference implementation using pthread_get_specific
 558 template<typename T>
 559 class PThreadGetSpecific {
 560  public:
 561   PThreadGetSpecific() : key_(0) {
 562     pthread_key_create(&key_, OnThreadExit);
 563   }
 564
 565   T* get() const {
 566     return static_cast<T*>(pthread_getspecific(key_));
 567   }
 568
 569   void reset(T* t) {
 570     delete get();
 571     pthread_setspecific(key_, t);
 572   }
 573   static void OnThreadExit(void* obj) {
 574     delete static_cast<T*>(obj);
 575   }
 576  private:
 577   pthread_key_t key_;
 578 };
 579
 580 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
 581
 582 #define REG(var)                                                \
 583   BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) {               \
 584     const int itersPerThread = iters / FLAGS_numThreads;        \
 585     std::vector<std::thread> threads;                           \
 586     for (int i = 0; i < FLAGS_numThreads; ++i) {                \
 587       threads.push_back(std::thread([&]() {                     \
 588         var.reset(new int(0));                                  \
 589         for (int i = 0; i < itersPerThread; ++i) {              \
 590           ++(*var.get());                                       \
 591         }                                                       \
 592       }));                                                      \
 593     }                                                           \
 594     for (auto& t : threads) {                                   \
 595       t.join();                                                 \
 596     }                                                           \
 597   }
 598
 599 ThreadLocalPtr<int> tlp;
 600 REG(tlp);
 601 PThreadGetSpecific<int> pthread_get_specific;
 602 REG(pthread_get_specific);
 603 boost::thread_specific_ptr<int> boost_tsp;
 604 REG(boost_tsp);
 605 BENCHMARK_DRAW_LINE();
 606
 607 int main(int argc, char** argv) {
 608   testing::InitGoogleTest(&argc, argv);
 609   gflags::ParseCommandLineFlags(&argc, &argv, true);
 610   gflags::SetCommandLineOptionWithMode(
 611     "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT
 612   );
 613   if (FLAGS_benchmark) {
 614     folly::runBenchmarks();
 615   }
 616   return RUN_ALL_TESTS();
 617 }
 618
 619 /*
 620 Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
 621
 622 Benchmark                               Iters   Total t    t/iter iter/sec
 623 ------------------------------------------------------------------------------
 624 *       BM_mt_tlp                   100000000  39.88 ms  398.8 ps  2.335 G
 625  +5.91% BM_mt_pthread_get_specific  100000000  42.23 ms  422.3 ps  2.205 G
 626  + 295% BM_mt_boost_tsp             100000000  157.8 ms  1.578 ns  604.5 M
 627 ------------------------------------------------------------------------------
 628 */