folly/test/ThreadLocalTest.cpp

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/ThreadLocal.h>
  18
  19 #include <dlfcn.h>
  20 #include <sys/types.h>
  21 #include <sys/wait.h>
  22 #include <unistd.h>
  23
  24 #include <array>
  25 #include <atomic>
  26 #include <chrono>
  27 #include <condition_variable>
  28 #include <limits.h>
  29 #include <map>
  30 #include <mutex>
  31 #include <set>
  32 #include <thread>
  33 #include <unordered_map>
  34
  35 #include <boost/thread/tss.hpp>
  36 #include <gflags/gflags.h>
  37 #include <glog/logging.h>
  38 #include <gtest/gtest.h>
  39
  40 #include <folly/Benchmark.h>
  41 #include <folly/Baton.h>
  42 #include <folly/experimental/io/FsUtil.h>
  43
  44 using namespace folly;
  45
  46 struct Widget {
  47   static int totalVal_;
  48   int val_;
  49   ~Widget() {
  50     totalVal_ += val_;
  51   }
  52
  53   static void customDeleter(Widget* w, TLPDestructionMode mode) {
  54     totalVal_ += (mode == TLPDestructionMode::ALL_THREADS) * 1000;
  55     delete w;
  56   }
  57 };
  58 int Widget::totalVal_ = 0;
  59
  60 TEST(ThreadLocalPtr, BasicDestructor) {
  61   Widget::totalVal_ = 0;
  62   ThreadLocalPtr<Widget> w;
  63   std::thread([&w]() {
  64       w.reset(new Widget());
  65       w.get()->val_ += 10;
  66     }).join();
  67   EXPECT_EQ(10, Widget::totalVal_);
  68 }
  69
  70 TEST(ThreadLocalPtr, CustomDeleter1) {
  71   Widget::totalVal_ = 0;
  72   {
  73     ThreadLocalPtr<Widget> w;
  74     std::thread([&w]() {
  75         w.reset(new Widget(), Widget::customDeleter);
  76         w.get()->val_ += 10;
  77       }).join();
  78     EXPECT_EQ(10, Widget::totalVal_);
  79   }
  80   EXPECT_EQ(10, Widget::totalVal_);
  81 }
  82
  83 TEST(ThreadLocalPtr, resetNull) {
  84   ThreadLocalPtr<int> tl;
  85   EXPECT_FALSE(tl);
  86   tl.reset(new int(4));
  87   EXPECT_TRUE(static_cast<bool>(tl));
  88   EXPECT_EQ(*tl.get(), 4);
  89   tl.reset();
  90   EXPECT_FALSE(tl);
  91 }
  92
  93 TEST(ThreadLocalPtr, TestRelease) {
  94   Widget::totalVal_ = 0;
  95   ThreadLocalPtr<Widget> w;
  96   std::unique_ptr<Widget> wPtr;
  97   std::thread([&w, &wPtr]() {
  98       w.reset(new Widget());
  99       w.get()->val_ += 10;
 100
 101       wPtr.reset(w.release());
 102     }).join();
 103   EXPECT_EQ(0, Widget::totalVal_);
 104   wPtr.reset();
 105   EXPECT_EQ(10, Widget::totalVal_);
 106 }
 107
 108 TEST(ThreadLocalPtr, CreateOnThreadExit) {
 109   Widget::totalVal_ = 0;
 110   ThreadLocal<Widget> w;
 111   ThreadLocalPtr<int> tl;
 112
 113   std::thread([&] {
 114       tl.reset(new int(1), [&] (int* ptr, TLPDestructionMode mode) {
 115         delete ptr;
 116         // This test ensures Widgets allocated here are not leaked.
 117         ++w.get()->val_;
 118         ThreadLocal<Widget> wl;
 119         ++wl.get()->val_;
 120       });
 121     }).join();
 122   EXPECT_EQ(2, Widget::totalVal_);
 123 }
 124
 125 // Test deleting the ThreadLocalPtr object
 126 TEST(ThreadLocalPtr, CustomDeleter2) {
 127   Widget::totalVal_ = 0;
 128   std::thread t;
 129   std::mutex mutex;
 130   std::condition_variable cv;
 131   enum class State {
 132     START,
 133     DONE,
 134     EXIT
 135   };
 136   State state = State::START;
 137   {
 138     ThreadLocalPtr<Widget> w;
 139     t = std::thread([&]() {
 140         w.reset(new Widget(), Widget::customDeleter);
 141         w.get()->val_ += 10;
 142
 143         // Notify main thread that we're done
 144         {
 145           std::unique_lock<std::mutex> lock(mutex);
 146           state = State::DONE;
 147           cv.notify_all();
 148         }
 149
 150         // Wait for main thread to allow us to exit
 151         {
 152           std::unique_lock<std::mutex> lock(mutex);
 153           while (state != State::EXIT) {
 154             cv.wait(lock);
 155           }
 156         }
 157     });
 158
 159     // Wait for main thread to start (and set w.get()->val_)
 160     {
 161       std::unique_lock<std::mutex> lock(mutex);
 162       while (state != State::DONE) {
 163         cv.wait(lock);
 164       }
 165     }
 166
 167     // Thread started but hasn't exited yet
 168     EXPECT_EQ(0, Widget::totalVal_);
 169
 170     // Destroy ThreadLocalPtr<Widget> (by letting it go out of scope)
 171   }
 172
 173   EXPECT_EQ(1010, Widget::totalVal_);
 174
 175   // Allow thread to exit
 176   {
 177     std::unique_lock<std::mutex> lock(mutex);
 178     state = State::EXIT;
 179     cv.notify_all();
 180   }
 181   t.join();
 182
 183   EXPECT_EQ(1010, Widget::totalVal_);
 184 }
 185
 186 TEST(ThreadLocal, BasicDestructor) {
 187   Widget::totalVal_ = 0;
 188   ThreadLocal<Widget> w;
 189   std::thread([&w]() { w->val_ += 10; }).join();
 190   EXPECT_EQ(10, Widget::totalVal_);
 191 }
 192
 193 TEST(ThreadLocal, SimpleRepeatDestructor) {
 194   Widget::totalVal_ = 0;
 195   {
 196     ThreadLocal<Widget> w;
 197     w->val_ += 10;
 198   }
 199   {
 200     ThreadLocal<Widget> w;
 201     w->val_ += 10;
 202   }
 203   EXPECT_EQ(20, Widget::totalVal_);
 204 }
 205
 206 TEST(ThreadLocal, InterleavedDestructors) {
 207   Widget::totalVal_ = 0;
 208   std::unique_ptr<ThreadLocal<Widget>> w;
 209   int wVersion = 0;
 210   const int wVersionMax = 2;
 211   int thIter = 0;
 212   std::mutex lock;
 213   auto th = std::thread([&]() {
 214     int wVersionPrev = 0;
 215     while (true) {
 216       while (true) {
 217         std::lock_guard<std::mutex> g(lock);
 218         if (wVersion > wVersionMax) {
 219           return;
 220         }
 221         if (wVersion > wVersionPrev) {
 222           // We have a new version of w, so it should be initialized to zero
 223           EXPECT_EQ((*w)->val_, 0);
 224           break;
 225         }
 226       }
 227       std::lock_guard<std::mutex> g(lock);
 228       wVersionPrev = wVersion;
 229       (*w)->val_ += 10;
 230       ++thIter;
 231     }
 232   });
 233   FOR_EACH_RANGE(i, 0, wVersionMax) {
 234     int thIterPrev = 0;
 235     {
 236       std::lock_guard<std::mutex> g(lock);
 237       thIterPrev = thIter;
 238       w.reset(new ThreadLocal<Widget>());
 239       ++wVersion;
 240     }
 241     while (true) {
 242       std::lock_guard<std::mutex> g(lock);
 243       if (thIter > thIterPrev) {
 244         break;
 245       }
 246     }
 247   }
 248   {
 249     std::lock_guard<std::mutex> g(lock);
 250     wVersion = wVersionMax + 1;
 251   }
 252   th.join();
 253   EXPECT_EQ(wVersionMax * 10, Widget::totalVal_);
 254 }
 255
 256 class SimpleThreadCachedInt {
 257
 258   class NewTag;
 259   ThreadLocal<int,NewTag> val_;
 260
 261  public:
 262   void add(int val) {
 263     *val_ += val;
 264   }
 265
 266   int read() {
 267     int ret = 0;
 268     for (const auto& i : val_.accessAllThreads()) {
 269       ret += i;
 270     }
 271     return ret;
 272   }
 273 };
 274
 275 TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
 276   const int kNumThreads = 10;
 277   SimpleThreadCachedInt stci;
 278   std::atomic<bool> run(true);
 279   std::atomic<int> totalAtomic(0);
 280   std::vector<std::thread> threads;
 281   for (int i = 0; i < kNumThreads; ++i) {
 282     threads.push_back(std::thread([&,i]() {
 283       stci.add(1);
 284       totalAtomic.fetch_add(1);
 285       while (run.load()) { usleep(100); }
 286     }));
 287   }
 288   while (totalAtomic.load() != kNumThreads) { usleep(100); }
 289   EXPECT_EQ(kNumThreads, stci.read());
 290   run.store(false);
 291   for (auto& t : threads) {
 292     t.join();
 293   }
 294 }
 295
 296 TEST(ThreadLocal, resetNull) {
 297   ThreadLocal<int> tl;
 298   tl.reset(new int(4));
 299   EXPECT_EQ(*tl.get(), 4);
 300   tl.reset();
 301   EXPECT_EQ(*tl.get(), 0);
 302   tl.reset(new int(5));
 303   EXPECT_EQ(*tl.get(), 5);
 304 }
 305
 306 namespace {
 307 struct Tag {};
 308
 309 struct Foo {
 310   folly::ThreadLocal<int, Tag> tl;
 311 };
 312 }  // namespace
 313
 314 TEST(ThreadLocal, Movable1) {
 315   Foo a;
 316   Foo b;
 317   EXPECT_TRUE(a.tl.get() != b.tl.get());
 318
 319   a = Foo();
 320   b = Foo();
 321   EXPECT_TRUE(a.tl.get() != b.tl.get());
 322 }
 323
 324 TEST(ThreadLocal, Movable2) {
 325   std::map<int, Foo> map;
 326
 327   map[42];
 328   map[10];
 329   map[23];
 330   map[100];
 331
 332   std::set<void*> tls;
 333   for (auto& m : map) {
 334     tls.insert(m.second.tl.get());
 335   }
 336
 337   // Make sure that we have 4 different instances of *tl
 338   EXPECT_EQ(4, tls.size());
 339 }
 340
 341 namespace {
 342
 343 constexpr size_t kFillObjectSize = 300;
 344
 345 std::atomic<uint64_t> gDestroyed;
 346
 347 /**
 348  * Fill a chunk of memory with a unique-ish pattern that includes the thread id
 349  * (so deleting one of these from another thread would cause a failure)
 350  *
 351  * Verify it explicitly and on destruction.
 352  */
 353 class FillObject {
 354  public:
 355   explicit FillObject(uint64_t idx) : idx_(idx) {
 356     uint64_t v = val();
 357     for (size_t i = 0; i < kFillObjectSize; ++i) {
 358       data_[i] = v;
 359     }
 360   }
 361
 362   void check() {
 363     uint64_t v = val();
 364     for (size_t i = 0; i < kFillObjectSize; ++i) {
 365       CHECK_EQ(v, data_[i]);
 366     }
 367   }
 368
 369   ~FillObject() {
 370     ++gDestroyed;
 371   }
 372
 373  private:
 374   uint64_t val() const {
 375     return (idx_ << 40) | uint64_t(pthread_self());
 376   }
 377
 378   uint64_t idx_;
 379   uint64_t data_[kFillObjectSize];
 380 };
 381
 382 }  // namespace
 383
 384 #if FOLLY_HAVE_STD_THIS_THREAD_SLEEP_FOR
 385 TEST(ThreadLocal, Stress) {
 386   constexpr size_t numFillObjects = 250;
 387   std::array<ThreadLocalPtr<FillObject>, numFillObjects> objects;
 388
 389   constexpr size_t numThreads = 32;
 390   constexpr size_t numReps = 20;
 391
 392   std::vector<std::thread> threads;
 393   threads.reserve(numThreads);
 394
 395   for (size_t i = 0; i < numThreads; ++i) {
 396     threads.emplace_back([&objects] {
 397       for (size_t rep = 0; rep < numReps; ++rep) {
 398         for (size_t i = 0; i < objects.size(); ++i) {
 399           objects[i].reset(new FillObject(rep * objects.size() + i));
 400           std::this_thread::sleep_for(std::chrono::microseconds(100));
 401         }
 402         for (size_t i = 0; i < objects.size(); ++i) {
 403           objects[i]->check();
 404         }
 405       }
 406     });
 407   }
 408
 409   for (auto& t : threads) {
 410     t.join();
 411   }
 412
 413   EXPECT_EQ(numFillObjects * numThreads * numReps, gDestroyed);
 414 }
 415 #endif
 416
 417 // Yes, threads and fork don't mix
 418 // (http://cppwisdom.quora.com/Why-threads-and-fork-dont-mix) but if you're
 419 // stupid or desperate enough to try, we shouldn't stand in your way.
 420 namespace {
 421 class HoldsOne {
 422  public:
 423   HoldsOne() : value_(1) { }
 424   // Do an actual access to catch the buggy case where this == nullptr
 425   int value() const { return value_; }
 426  private:
 427   int value_;
 428 };
 429
 430 struct HoldsOneTag {};
 431
 432 ThreadLocal<HoldsOne, HoldsOneTag> ptr;
 433
 434 int totalValue() {
 435   int value = 0;
 436   for (auto& p : ptr.accessAllThreads()) {
 437     value += p.value();
 438   }
 439   return value;
 440 }
 441
 442 }  // namespace
 443
 444 #ifdef FOLLY_HAVE_PTHREAD_ATFORK
 445 TEST(ThreadLocal, Fork) {
 446   EXPECT_EQ(1, ptr->value());  // ensure created
 447   EXPECT_EQ(1, totalValue());
 448   // Spawn a new thread
 449
 450   std::mutex mutex;
 451   bool started = false;
 452   std::condition_variable startedCond;
 453   bool stopped = false;
 454   std::condition_variable stoppedCond;
 455
 456   std::thread t([&] () {
 457     EXPECT_EQ(1, ptr->value());  // ensure created
 458     {
 459       std::unique_lock<std::mutex> lock(mutex);
 460       started = true;
 461       startedCond.notify_all();
 462     }
 463     {
 464       std::unique_lock<std::mutex> lock(mutex);
 465       while (!stopped) {
 466         stoppedCond.wait(lock);
 467       }
 468     }
 469   });
 470
 471   {
 472     std::unique_lock<std::mutex> lock(mutex);
 473     while (!started) {
 474       startedCond.wait(lock);
 475     }
 476   }
 477
 478   EXPECT_EQ(2, totalValue());
 479
 480   pid_t pid = fork();
 481   if (pid == 0) {
 482     // in child
 483     int v = totalValue();
 484
 485     // exit successfully if v == 1 (one thread)
 486     // diagnostic error code otherwise :)
 487     switch (v) {
 488     case 1: _exit(0);
 489     case 0: _exit(1);
 490     }
 491     _exit(2);
 492   } else if (pid > 0) {
 493     // in parent
 494     int status;
 495     EXPECT_EQ(pid, waitpid(pid, &status, 0));
 496     EXPECT_TRUE(WIFEXITED(status));
 497     EXPECT_EQ(0, WEXITSTATUS(status));
 498   } else {
 499     EXPECT_TRUE(false) << "fork failed";
 500   }
 501
 502   EXPECT_EQ(2, totalValue());
 503
 504   {
 505     std::unique_lock<std::mutex> lock(mutex);
 506     stopped = true;
 507     stoppedCond.notify_all();
 508   }
 509
 510   t.join();
 511
 512   EXPECT_EQ(1, totalValue());
 513 }
 514 #endif
 515
 516 struct HoldsOneTag2 {};
 517
 518 TEST(ThreadLocal, Fork2) {
 519   // A thread-local tag that was used in the parent from a *different* thread
 520   // (but not the forking thread) would cause the child to hang in a
 521   // ThreadLocalPtr's object destructor. Yeah.
 522   ThreadLocal<HoldsOne, HoldsOneTag2> p;
 523   {
 524     // use tag in different thread
 525     std::thread t([&p] { p.get(); });
 526     t.join();
 527   }
 528   pid_t pid = fork();
 529   if (pid == 0) {
 530     {
 531       ThreadLocal<HoldsOne, HoldsOneTag2> q;
 532       q.get();
 533     }
 534     _exit(0);
 535   } else if (pid > 0) {
 536     int status;
 537     EXPECT_EQ(pid, waitpid(pid, &status, 0));
 538     EXPECT_TRUE(WIFEXITED(status));
 539     EXPECT_EQ(0, WEXITSTATUS(status));
 540   } else {
 541     EXPECT_TRUE(false) << "fork failed";
 542   }
 543 }
 544
 545 TEST(ThreadLocal, SharedLibrary) {
 546   auto exe = fs::executable_path();
 547   auto lib = exe.parent_path() / "lib_thread_local_test.so";
 548   auto handle = dlopen(lib.string().c_str(), RTLD_LAZY);
 549   EXPECT_NE(nullptr, handle);
 550
 551   typedef void (*useA_t)();
 552   dlerror();
 553   useA_t useA = (useA_t) dlsym(handle, "useA");
 554
 555   const char *dlsym_error = dlerror();
 556   EXPECT_EQ(nullptr, dlsym_error);
 557
 558   useA();
 559
 560   folly::Baton<> b11, b12, b21, b22;
 561
 562   std::thread t1([&]() {
 563       useA();
 564       b11.post();
 565       b12.wait();
 566     });
 567
 568   std::thread t2([&]() {
 569       useA();
 570       b21.post();
 571       b22.wait();
 572     });
 573
 574   b11.wait();
 575   b21.wait();
 576
 577   dlclose(handle);
 578
 579   b12.post();
 580   b22.post();
 581
 582   t1.join();
 583   t2.join();
 584 }
 585
 586 namespace folly { namespace threadlocal_detail {
 587 struct PthreadKeyUnregisterTester {
 588   PthreadKeyUnregister p;
 589   constexpr PthreadKeyUnregisterTester() = default;
 590 };
 591 }}
 592
 593 TEST(ThreadLocal, UnregisterClassHasConstExprCtor) {
 594   folly::threadlocal_detail::PthreadKeyUnregisterTester x;
 595   // yep!
 596   SUCCEED();
 597 }
 598
 599 // clang is unable to compile this code unless in c++14 mode.
 600 #if __cplusplus >= 201402L
 601 namespace {
 602 // This will fail to compile unless ThreadLocal{Ptr} has a constexpr
 603 // default constructor. This ensures that ThreadLocal is safe to use in
 604 // static constructors without worrying about initialization order
 605 class ConstexprThreadLocalCompile {
 606   ThreadLocal<int> a_;
 607   ThreadLocalPtr<int> b_;
 608
 609   constexpr ConstexprThreadLocalCompile() {}
 610 };
 611 }
 612 #endif
 613
 614 // Simple reference implementation using pthread_get_specific
 615 template<typename T>
 616 class PThreadGetSpecific {
 617  public:
 618   PThreadGetSpecific() : key_(0) {
 619     pthread_key_create(&key_, OnThreadExit);
 620   }
 621
 622   T* get() const {
 623     return static_cast<T*>(pthread_getspecific(key_));
 624   }
 625
 626   void reset(T* t) {
 627     delete get();
 628     pthread_setspecific(key_, t);
 629   }
 630   static void OnThreadExit(void* obj) {
 631     delete static_cast<T*>(obj);
 632   }
 633  private:
 634   pthread_key_t key_;
 635 };
 636
 637 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
 638
 639 #define REG(var)                                                \
 640   BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) {               \
 641     const int itersPerThread = iters / FLAGS_numThreads;        \
 642     std::vector<std::thread> threads;                           \
 643     for (int i = 0; i < FLAGS_numThreads; ++i) {                \
 644       threads.push_back(std::thread([&]() {                     \
 645         var.reset(new int(0));                                  \
 646         for (int i = 0; i < itersPerThread; ++i) {              \
 647           ++(*var.get());                                       \
 648         }                                                       \
 649       }));                                                      \
 650     }                                                           \
 651     for (auto& t : threads) {                                   \
 652       t.join();                                                 \
 653     }                                                           \
 654   }
 655
 656 ThreadLocalPtr<int> tlp;
 657 REG(tlp);
 658 PThreadGetSpecific<int> pthread_get_specific;
 659 REG(pthread_get_specific);
 660 boost::thread_specific_ptr<int> boost_tsp;
 661 REG(boost_tsp);
 662 BENCHMARK_DRAW_LINE();
 663
 664 int main(int argc, char** argv) {
 665   testing::InitGoogleTest(&argc, argv);
 666   gflags::ParseCommandLineFlags(&argc, &argv, true);
 667   gflags::SetCommandLineOptionWithMode(
 668     "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT
 669   );
 670   if (FLAGS_benchmark) {
 671     folly::runBenchmarks();
 672   }
 673   return RUN_ALL_TESTS();
 674 }
 675
 676 /*
 677 Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
 678
 679 Benchmark                               Iters   Total t    t/iter iter/sec
 680 ------------------------------------------------------------------------------
 681 *       BM_mt_tlp                   100000000  39.88 ms  398.8 ps  2.335 G
 682  +5.91% BM_mt_pthread_get_specific  100000000  42.23 ms  422.3 ps  2.205 G
 683  + 295% BM_mt_boost_tsp             100000000  157.8 ms  1.578 ns  604.5 M
 684 ------------------------------------------------------------------------------
 685 */