Fix SimpleBarrier
[folly.git] / folly / test / ThreadLocalBenchmark.cpp
1 /*
2  * Copyright 2016 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <folly/ThreadLocal.h>
18
19 #include <sys/types.h>
20
21 #include <array>
22 #include <atomic>
23 #include <chrono>
24 #include <condition_variable>
25 #include <limits.h>
26 #include <map>
27 #include <mutex>
28 #include <set>
29 #include <thread>
30 #include <unordered_map>
31
32 #include <boost/thread/tss.hpp>
33 #include <glog/logging.h>
34
35 #include <folly/Benchmark.h>
36 #include <folly/experimental/io/FsUtil.h>
37 #include <folly/portability/GFlags.h>
38 #include <folly/portability/Unistd.h>
39
40 using namespace folly;
41
42 // Simple reference implementation using pthread_get_specific
43 template <typename T>
44 class PThreadGetSpecific {
45  public:
46   PThreadGetSpecific() : key_(0) { pthread_key_create(&key_, OnThreadExit); }
47
48   T* get() const { return static_cast<T*>(pthread_getspecific(key_)); }
49
50   void reset(T* t) {
51     delete get();
52     pthread_setspecific(key_, t);
53   }
54   static void OnThreadExit(void* obj) { delete static_cast<T*>(obj); }
55
56  private:
57   pthread_key_t key_;
58 };
59
60 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
61
62 #define REG(var)                                         \
63   BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) {        \
64     const int itersPerThread = iters / FLAGS_numThreads; \
65     std::vector<std::thread> threads;                    \
66     for (int i = 0; i < FLAGS_numThreads; ++i) {         \
67       threads.push_back(std::thread([&]() {              \
68         var.reset(new int(0));                           \
69         for (int j = 0; j < itersPerThread; ++j) {       \
70           ++(*var.get());                                \
71         }                                                \
72       }));                                               \
73     }                                                    \
74     for (auto& t : threads) {                            \
75       t.join();                                          \
76     }                                                    \
77   }
78
79 ThreadLocalPtr<int> tlp;
80 REG(tlp);
81 PThreadGetSpecific<int> pthread_get_specific;
82 REG(pthread_get_specific);
83 boost::thread_specific_ptr<int> boost_tsp;
84 REG(boost_tsp);
85 BENCHMARK_DRAW_LINE();
86
87 int main(int argc, char** argv) {
88   gflags::ParseCommandLineFlags(&argc, &argv, true);
89   gflags::SetCommandLineOptionWithMode(
90       "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT);
91   folly::runBenchmarks();
92   return 0;
93 }
94
95 /*
96 Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
97
98 Benchmark                               Iters   Total t    t/iter iter/sec
99 ------------------------------------------------------------------------------
100 *       BM_mt_tlp                   100000000  39.88 ms  398.8 ps  2.335 G
101  +5.91% BM_mt_pthread_get_specific  100000000  42.23 ms  422.3 ps  2.205 G
102  + 295% BM_mt_boost_tsp             100000000  157.8 ms  1.578 ns  604.5 M
103 ------------------------------------------------------------------------------
104 */