Futex::futexWait returns FutexResult
[folly.git] / folly / test / ThreadLocalBenchmark.cpp
1 /*
2  * Copyright 2017 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <folly/ThreadLocal.h>
18
19 #include <sys/types.h>
20
21 #include <array>
22 #include <atomic>
23 #include <condition_variable>
24 #include <map>
25 #include <mutex>
26 #include <set>
27 #include <thread>
28
29 #include <boost/thread/tss.hpp>
30 #include <glog/logging.h>
31
32 #include <folly/Benchmark.h>
33 #include <folly/experimental/io/FsUtil.h>
34 #include <folly/portability/GFlags.h>
35
36 using namespace folly;
37
38 // Simple reference implementation using pthread_get_specific
39 template <typename T>
40 class PThreadGetSpecific {
41  public:
42   PThreadGetSpecific() : key_(0) { pthread_key_create(&key_, OnThreadExit); }
43
44   T* get() const { return static_cast<T*>(pthread_getspecific(key_)); }
45
46   void reset(T* t) {
47     delete get();
48     pthread_setspecific(key_, t);
49   }
50   static void OnThreadExit(void* obj) { delete static_cast<T*>(obj); }
51
52  private:
53   pthread_key_t key_;
54 };
55
56 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
57
58 #define REG(var)                                         \
59   BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) {        \
60     const int itersPerThread = iters / FLAGS_numThreads; \
61     std::vector<std::thread> threads;                    \
62     for (int i = 0; i < FLAGS_numThreads; ++i) {         \
63       threads.push_back(std::thread([&]() {              \
64         var.reset(new int(0));                           \
65         for (int j = 0; j < itersPerThread; ++j) {       \
66           ++(*var.get());                                \
67         }                                                \
68       }));                                               \
69     }                                                    \
70     for (auto& t : threads) {                            \
71       t.join();                                          \
72     }                                                    \
73   }
74
75 ThreadLocalPtr<int> tlp;
76 REG(tlp);
77 PThreadGetSpecific<int> pthread_get_specific;
78 REG(pthread_get_specific);
79 boost::thread_specific_ptr<int> boost_tsp;
80 REG(boost_tsp);
81 BENCHMARK_DRAW_LINE();
82
83 struct foo {
84   int a{0};
85   int b{0};
86 };
87
88 template <typename TL>
89 void run_multi(uint32_t iters) {
90   const int itersPerThread = iters / FLAGS_numThreads;
91   std::vector<std::thread> threads;
92   TL var;
93   for (int i = 0; i < FLAGS_numThreads; ++i) {
94     threads.push_back(std::thread([&]() {
95       var.reset(new foo);
96       for (int j = 0; j < itersPerThread; ++j) {
97         ++var.get()->a;
98         var.get()->b += var.get()->a;
99         --var.get()->a;
100         var.get()->b += var.get()->a;
101       }
102     }));
103   }
104   for (auto& t : threads) {
105     t.join();
106   }
107 }
108
109 BENCHMARK(BM_mt_tlp_multi, iters) {
110   run_multi<ThreadLocalPtr<foo>>(iters);
111 }
112 BENCHMARK(BM_mt_pthread_get_specific_multi, iters) {
113   run_multi<PThreadGetSpecific<foo>>(iters);
114 }
115 BENCHMARK(BM_mt_boost_tsp_multi, iters) {
116   run_multi<boost::thread_specific_ptr<foo>>(iters);
117 }
118 BENCHMARK_DRAW_LINE();
119
120 int main(int argc, char** argv) {
121   gflags::ParseCommandLineFlags(&argc, &argv, true);
122   gflags::SetCommandLineOptionWithMode(
123       "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT);
124   folly::runBenchmarks();
125   return 0;
126 }
127
128 /*
129 ./buck-out/gen/folly/test/thread_local_benchmark --bm_min_iters=10000000
130 --numThreads=1
131
132 ============================================================================
133 folly/test/ThreadLocalBenchmark.cpp             relative  time/iter  iters/s
134 ============================================================================
135 BM_mt_tlp                                                    1.92ns  520.02M
136 BM_mt_pthread_get_specific                                   2.69ns  372.15M
137 BM_mt_boost_tsp                                             11.81ns   84.67M
138 ----------------------------------------------------------------------------
139 BM_mt_tlp_multi                                              7.53ns  132.79M
140 BM_mt_pthread_get_specific_multi                            15.80ns   63.29M
141 BM_mt_boost_tsp_multi                                       71.70ns   13.95M
142 ----------------------------------------------------------------------------
143 ============================================================================
144 */