folly copyright 2015 -> copyright 2016
[folly.git] / folly / test / CacheLocalityTest.cpp
1 /*
2  * Copyright 2016 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <folly/detail/CacheLocality.h>
18
19 #include <sched.h>
20 #include <memory>
21 #include <thread>
22 #include <type_traits>
23 #include <unordered_map>
24 #include <glog/logging.h>
25 #include <gtest/gtest.h>
26 #include <folly/Benchmark.h>
27
28 using namespace folly::detail;
29
30 /// This is the relevant nodes from a production box's sysfs tree.  If you
31 /// think this map is ugly you should see the version of this test that
32 /// used a real directory tree.  To reduce the chance of testing error
33 /// I haven't tried to remove the common prefix
34 static std::unordered_map<std::string,std::string> fakeSysfsTree = {
35   { "/sys/devices/system/cpu/cpu0/cache/index0/shared_cpu_list", "0,17" },
36   { "/sys/devices/system/cpu/cpu0/cache/index0/type", "Data" },
37   { "/sys/devices/system/cpu/cpu0/cache/index1/shared_cpu_list", "0,17" },
38   { "/sys/devices/system/cpu/cpu0/cache/index1/type", "Instruction" },
39   { "/sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_list", "0,17" },
40   { "/sys/devices/system/cpu/cpu0/cache/index2/type", "Unified" },
41   { "/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_list", "0-8,17-23" },
42   { "/sys/devices/system/cpu/cpu0/cache/index3/type", "Unified" },
43   { "/sys/devices/system/cpu/cpu1/cache/index0/shared_cpu_list", "1,18" },
44   { "/sys/devices/system/cpu/cpu1/cache/index0/type", "Data" },
45   { "/sys/devices/system/cpu/cpu1/cache/index1/shared_cpu_list", "1,18" },
46   { "/sys/devices/system/cpu/cpu1/cache/index1/type", "Instruction" },
47   { "/sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_list", "1,18" },
48   { "/sys/devices/system/cpu/cpu1/cache/index2/type", "Unified" },
49   { "/sys/devices/system/cpu/cpu1/cache/index3/shared_cpu_list", "0-8,17-23" },
50   { "/sys/devices/system/cpu/cpu1/cache/index3/type", "Unified" },
51   { "/sys/devices/system/cpu/cpu2/cache/index0/shared_cpu_list", "2,19" },
52   { "/sys/devices/system/cpu/cpu2/cache/index0/type", "Data" },
53   { "/sys/devices/system/cpu/cpu2/cache/index1/shared_cpu_list", "2,19" },
54   { "/sys/devices/system/cpu/cpu2/cache/index1/type", "Instruction" },
55   { "/sys/devices/system/cpu/cpu2/cache/index2/shared_cpu_list", "2,19" },
56   { "/sys/devices/system/cpu/cpu2/cache/index2/type", "Unified" },
57   { "/sys/devices/system/cpu/cpu2/cache/index3/shared_cpu_list", "0-8,17-23" },
58   { "/sys/devices/system/cpu/cpu2/cache/index3/type", "Unified" },
59   { "/sys/devices/system/cpu/cpu3/cache/index0/shared_cpu_list", "3,20" },
60   { "/sys/devices/system/cpu/cpu3/cache/index0/type", "Data" },
61   { "/sys/devices/system/cpu/cpu3/cache/index1/shared_cpu_list", "3,20" },
62   { "/sys/devices/system/cpu/cpu3/cache/index1/type", "Instruction" },
63   { "/sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_list", "3,20" },
64   { "/sys/devices/system/cpu/cpu3/cache/index2/type", "Unified" },
65   { "/sys/devices/system/cpu/cpu3/cache/index3/shared_cpu_list", "0-8,17-23" },
66   { "/sys/devices/system/cpu/cpu3/cache/index3/type", "Unified" },
67   { "/sys/devices/system/cpu/cpu4/cache/index0/shared_cpu_list", "4,21" },
68   { "/sys/devices/system/cpu/cpu4/cache/index0/type", "Data" },
69   { "/sys/devices/system/cpu/cpu4/cache/index1/shared_cpu_list", "4,21" },
70   { "/sys/devices/system/cpu/cpu4/cache/index1/type", "Instruction" },
71   { "/sys/devices/system/cpu/cpu4/cache/index2/shared_cpu_list", "4,21" },
72   { "/sys/devices/system/cpu/cpu4/cache/index2/type", "Unified" },
73   { "/sys/devices/system/cpu/cpu4/cache/index3/shared_cpu_list", "0-8,17-23" },
74   { "/sys/devices/system/cpu/cpu4/cache/index3/type", "Unified" },
75   { "/sys/devices/system/cpu/cpu5/cache/index0/shared_cpu_list", "5-6" },
76   { "/sys/devices/system/cpu/cpu5/cache/index0/type", "Data" },
77   { "/sys/devices/system/cpu/cpu5/cache/index1/shared_cpu_list", "5-6" },
78   { "/sys/devices/system/cpu/cpu5/cache/index1/type", "Instruction" },
79   { "/sys/devices/system/cpu/cpu5/cache/index2/shared_cpu_list", "5-6" },
80   { "/sys/devices/system/cpu/cpu5/cache/index2/type", "Unified" },
81   { "/sys/devices/system/cpu/cpu5/cache/index3/shared_cpu_list", "0-8,17-23" },
82   { "/sys/devices/system/cpu/cpu5/cache/index3/type", "Unified" },
83   { "/sys/devices/system/cpu/cpu6/cache/index0/shared_cpu_list", "5-6" },
84   { "/sys/devices/system/cpu/cpu6/cache/index0/type", "Data" },
85   { "/sys/devices/system/cpu/cpu6/cache/index1/shared_cpu_list", "5-6" },
86   { "/sys/devices/system/cpu/cpu6/cache/index1/type", "Instruction" },
87   { "/sys/devices/system/cpu/cpu6/cache/index2/shared_cpu_list", "5-6" },
88   { "/sys/devices/system/cpu/cpu6/cache/index2/type", "Unified" },
89   { "/sys/devices/system/cpu/cpu6/cache/index3/shared_cpu_list", "0-8,17-23" },
90   { "/sys/devices/system/cpu/cpu6/cache/index3/type", "Unified" },
91   { "/sys/devices/system/cpu/cpu7/cache/index0/shared_cpu_list", "7,22" },
92   { "/sys/devices/system/cpu/cpu7/cache/index0/type", "Data" },
93   { "/sys/devices/system/cpu/cpu7/cache/index1/shared_cpu_list", "7,22" },
94   { "/sys/devices/system/cpu/cpu7/cache/index1/type", "Instruction" },
95   { "/sys/devices/system/cpu/cpu7/cache/index2/shared_cpu_list", "7,22" },
96   { "/sys/devices/system/cpu/cpu7/cache/index2/type", "Unified" },
97   { "/sys/devices/system/cpu/cpu7/cache/index3/shared_cpu_list", "0-8,17-23" },
98   { "/sys/devices/system/cpu/cpu7/cache/index3/type", "Unified" },
99   { "/sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list", "8,23" },
100   { "/sys/devices/system/cpu/cpu8/cache/index0/type", "Data" },
101   { "/sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list", "8,23" },
102   { "/sys/devices/system/cpu/cpu8/cache/index1/type", "Instruction" },
103   { "/sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list", "8,23" },
104   { "/sys/devices/system/cpu/cpu8/cache/index2/type", "Unified" },
105   { "/sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list", "0-8,17-23" },
106   { "/sys/devices/system/cpu/cpu8/cache/index3/type", "Unified" },
107   { "/sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list", "9,24" },
108   { "/sys/devices/system/cpu/cpu9/cache/index0/type", "Data" },
109   { "/sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list", "9,24" },
110   { "/sys/devices/system/cpu/cpu9/cache/index1/type", "Instruction" },
111   { "/sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list", "9,24" },
112   { "/sys/devices/system/cpu/cpu9/cache/index2/type", "Unified" },
113   { "/sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list", "9-16,24-31" },
114   { "/sys/devices/system/cpu/cpu9/cache/index3/type", "Unified" },
115   { "/sys/devices/system/cpu/cpu10/cache/index0/shared_cpu_list", "10,25" },
116   { "/sys/devices/system/cpu/cpu10/cache/index0/type", "Data" },
117   { "/sys/devices/system/cpu/cpu10/cache/index1/shared_cpu_list", "10,25" },
118   { "/sys/devices/system/cpu/cpu10/cache/index1/type", "Instruction" },
119   { "/sys/devices/system/cpu/cpu10/cache/index2/shared_cpu_list", "10,25" },
120   { "/sys/devices/system/cpu/cpu10/cache/index2/type", "Unified" },
121   { "/sys/devices/system/cpu/cpu10/cache/index3/shared_cpu_list", "9-16,24-31"},
122   { "/sys/devices/system/cpu/cpu10/cache/index3/type", "Unified" },
123   { "/sys/devices/system/cpu/cpu11/cache/index0/shared_cpu_list", "11,26" },
124   { "/sys/devices/system/cpu/cpu11/cache/index0/type", "Data" },
125   { "/sys/devices/system/cpu/cpu11/cache/index1/shared_cpu_list", "11,26" },
126   { "/sys/devices/system/cpu/cpu11/cache/index1/type", "Instruction" },
127   { "/sys/devices/system/cpu/cpu11/cache/index2/shared_cpu_list", "11,26" },
128   { "/sys/devices/system/cpu/cpu11/cache/index2/type", "Unified" },
129   { "/sys/devices/system/cpu/cpu11/cache/index3/shared_cpu_list", "9-16,24-31"},
130   { "/sys/devices/system/cpu/cpu11/cache/index3/type", "Unified" },
131   { "/sys/devices/system/cpu/cpu12/cache/index0/shared_cpu_list", "12,27" },
132   { "/sys/devices/system/cpu/cpu12/cache/index0/type", "Data" },
133   { "/sys/devices/system/cpu/cpu12/cache/index1/shared_cpu_list", "12,27" },
134   { "/sys/devices/system/cpu/cpu12/cache/index1/type", "Instruction" },
135   { "/sys/devices/system/cpu/cpu12/cache/index2/shared_cpu_list", "12,27" },
136   { "/sys/devices/system/cpu/cpu12/cache/index2/type", "Unified" },
137   { "/sys/devices/system/cpu/cpu12/cache/index3/shared_cpu_list", "9-16,24-31"},
138   { "/sys/devices/system/cpu/cpu12/cache/index3/type", "Unified" },
139   { "/sys/devices/system/cpu/cpu13/cache/index0/shared_cpu_list", "13,28" },
140   { "/sys/devices/system/cpu/cpu13/cache/index0/type", "Data" },
141   { "/sys/devices/system/cpu/cpu13/cache/index1/shared_cpu_list", "13,28" },
142   { "/sys/devices/system/cpu/cpu13/cache/index1/type", "Instruction" },
143   { "/sys/devices/system/cpu/cpu13/cache/index2/shared_cpu_list", "13,28" },
144   { "/sys/devices/system/cpu/cpu13/cache/index2/type", "Unified" },
145   { "/sys/devices/system/cpu/cpu13/cache/index3/shared_cpu_list", "9-16,24-31"},
146   { "/sys/devices/system/cpu/cpu13/cache/index3/type", "Unified" },
147   { "/sys/devices/system/cpu/cpu14/cache/index0/shared_cpu_list", "14,29" },
148   { "/sys/devices/system/cpu/cpu14/cache/index0/type", "Data" },
149   { "/sys/devices/system/cpu/cpu14/cache/index1/shared_cpu_list", "14,29" },
150   { "/sys/devices/system/cpu/cpu14/cache/index1/type", "Instruction" },
151   { "/sys/devices/system/cpu/cpu14/cache/index2/shared_cpu_list", "14,29" },
152   { "/sys/devices/system/cpu/cpu14/cache/index2/type", "Unified" },
153   { "/sys/devices/system/cpu/cpu14/cache/index3/shared_cpu_list", "9-16,24-31"},
154   { "/sys/devices/system/cpu/cpu14/cache/index3/type", "Unified" },
155   { "/sys/devices/system/cpu/cpu15/cache/index0/shared_cpu_list", "15,30" },
156   { "/sys/devices/system/cpu/cpu15/cache/index0/type", "Data" },
157   { "/sys/devices/system/cpu/cpu15/cache/index1/shared_cpu_list", "15,30" },
158   { "/sys/devices/system/cpu/cpu15/cache/index1/type", "Instruction" },
159   { "/sys/devices/system/cpu/cpu15/cache/index2/shared_cpu_list", "15,30" },
160   { "/sys/devices/system/cpu/cpu15/cache/index2/type", "Unified" },
161   { "/sys/devices/system/cpu/cpu15/cache/index3/shared_cpu_list", "9-16,24-31"},
162   { "/sys/devices/system/cpu/cpu15/cache/index3/type", "Unified" },
163   { "/sys/devices/system/cpu/cpu16/cache/index0/shared_cpu_list", "16,31" },
164   { "/sys/devices/system/cpu/cpu16/cache/index0/type", "Data" },
165   { "/sys/devices/system/cpu/cpu16/cache/index1/shared_cpu_list", "16,31" },
166   { "/sys/devices/system/cpu/cpu16/cache/index1/type", "Instruction" },
167   { "/sys/devices/system/cpu/cpu16/cache/index2/shared_cpu_list", "16,31" },
168   { "/sys/devices/system/cpu/cpu16/cache/index2/type", "Unified" },
169   { "/sys/devices/system/cpu/cpu16/cache/index3/shared_cpu_list", "9-16,24-31"},
170   { "/sys/devices/system/cpu/cpu16/cache/index3/type", "Unified" },
171   { "/sys/devices/system/cpu/cpu17/cache/index0/shared_cpu_list", "0,17" },
172   { "/sys/devices/system/cpu/cpu17/cache/index0/type", "Data" },
173   { "/sys/devices/system/cpu/cpu17/cache/index1/shared_cpu_list", "0,17" },
174   { "/sys/devices/system/cpu/cpu17/cache/index1/type", "Instruction" },
175   { "/sys/devices/system/cpu/cpu17/cache/index2/shared_cpu_list", "0,17" },
176   { "/sys/devices/system/cpu/cpu17/cache/index2/type", "Unified" },
177   { "/sys/devices/system/cpu/cpu17/cache/index3/shared_cpu_list", "0-8,17-23" },
178   { "/sys/devices/system/cpu/cpu17/cache/index3/type", "Unified" },
179   { "/sys/devices/system/cpu/cpu18/cache/index0/shared_cpu_list", "1,18" },
180   { "/sys/devices/system/cpu/cpu18/cache/index0/type", "Data" },
181   { "/sys/devices/system/cpu/cpu18/cache/index1/shared_cpu_list", "1,18" },
182   { "/sys/devices/system/cpu/cpu18/cache/index1/type", "Instruction" },
183   { "/sys/devices/system/cpu/cpu18/cache/index2/shared_cpu_list", "1,18" },
184   { "/sys/devices/system/cpu/cpu18/cache/index2/type", "Unified" },
185   { "/sys/devices/system/cpu/cpu18/cache/index3/shared_cpu_list", "0-8,17-23" },
186   { "/sys/devices/system/cpu/cpu18/cache/index3/type", "Unified" },
187   { "/sys/devices/system/cpu/cpu19/cache/index0/shared_cpu_list", "2,19" },
188   { "/sys/devices/system/cpu/cpu19/cache/index0/type", "Data" },
189   { "/sys/devices/system/cpu/cpu19/cache/index1/shared_cpu_list", "2,19" },
190   { "/sys/devices/system/cpu/cpu19/cache/index1/type", "Instruction" },
191   { "/sys/devices/system/cpu/cpu19/cache/index2/shared_cpu_list", "2,19" },
192   { "/sys/devices/system/cpu/cpu19/cache/index2/type", "Unified" },
193   { "/sys/devices/system/cpu/cpu19/cache/index3/shared_cpu_list", "0-8,17-23" },
194   { "/sys/devices/system/cpu/cpu19/cache/index3/type", "Unified" },
195   { "/sys/devices/system/cpu/cpu20/cache/index0/shared_cpu_list", "3,20" },
196   { "/sys/devices/system/cpu/cpu20/cache/index0/type", "Data" },
197   { "/sys/devices/system/cpu/cpu20/cache/index1/shared_cpu_list", "3,20" },
198   { "/sys/devices/system/cpu/cpu20/cache/index1/type", "Instruction" },
199   { "/sys/devices/system/cpu/cpu20/cache/index2/shared_cpu_list", "3,20" },
200   { "/sys/devices/system/cpu/cpu20/cache/index2/type", "Unified" },
201   { "/sys/devices/system/cpu/cpu20/cache/index3/shared_cpu_list", "0-8,17-23" },
202   { "/sys/devices/system/cpu/cpu20/cache/index3/type", "Unified" },
203   { "/sys/devices/system/cpu/cpu21/cache/index0/shared_cpu_list", "4,21" },
204   { "/sys/devices/system/cpu/cpu21/cache/index0/type", "Data" },
205   { "/sys/devices/system/cpu/cpu21/cache/index1/shared_cpu_list", "4,21" },
206   { "/sys/devices/system/cpu/cpu21/cache/index1/type", "Instruction" },
207   { "/sys/devices/system/cpu/cpu21/cache/index2/shared_cpu_list", "4,21" },
208   { "/sys/devices/system/cpu/cpu21/cache/index2/type", "Unified" },
209   { "/sys/devices/system/cpu/cpu21/cache/index3/shared_cpu_list", "0-8,17-23" },
210   { "/sys/devices/system/cpu/cpu21/cache/index3/type", "Unified" },
211   { "/sys/devices/system/cpu/cpu22/cache/index0/shared_cpu_list", "7,22" },
212   { "/sys/devices/system/cpu/cpu22/cache/index0/type", "Data" },
213   { "/sys/devices/system/cpu/cpu22/cache/index1/shared_cpu_list", "7,22" },
214   { "/sys/devices/system/cpu/cpu22/cache/index1/type", "Instruction" },
215   { "/sys/devices/system/cpu/cpu22/cache/index2/shared_cpu_list", "7,22" },
216   { "/sys/devices/system/cpu/cpu22/cache/index2/type", "Unified" },
217   { "/sys/devices/system/cpu/cpu22/cache/index3/shared_cpu_list", "0-8,17-23" },
218   { "/sys/devices/system/cpu/cpu22/cache/index3/type", "Unified" },
219   { "/sys/devices/system/cpu/cpu23/cache/index0/shared_cpu_list", "8,23" },
220   { "/sys/devices/system/cpu/cpu23/cache/index0/type", "Data" },
221   { "/sys/devices/system/cpu/cpu23/cache/index1/shared_cpu_list", "8,23" },
222   { "/sys/devices/system/cpu/cpu23/cache/index1/type", "Instruction" },
223   { "/sys/devices/system/cpu/cpu23/cache/index2/shared_cpu_list", "8,23" },
224   { "/sys/devices/system/cpu/cpu23/cache/index2/type", "Unified" },
225   { "/sys/devices/system/cpu/cpu23/cache/index3/shared_cpu_list", "0-8,17-23" },
226   { "/sys/devices/system/cpu/cpu23/cache/index3/type", "Unified" },
227   { "/sys/devices/system/cpu/cpu24/cache/index0/shared_cpu_list", "9,24" },
228   { "/sys/devices/system/cpu/cpu24/cache/index0/type", "Data" },
229   { "/sys/devices/system/cpu/cpu24/cache/index1/shared_cpu_list", "9,24" },
230   { "/sys/devices/system/cpu/cpu24/cache/index1/type", "Instruction" },
231   { "/sys/devices/system/cpu/cpu24/cache/index2/shared_cpu_list", "9,24" },
232   { "/sys/devices/system/cpu/cpu24/cache/index2/type", "Unified" },
233   { "/sys/devices/system/cpu/cpu24/cache/index3/shared_cpu_list", "9-16,24-31"},
234   { "/sys/devices/system/cpu/cpu24/cache/index3/type", "Unified" },
235   { "/sys/devices/system/cpu/cpu25/cache/index0/shared_cpu_list", "10,25" },
236   { "/sys/devices/system/cpu/cpu25/cache/index0/type", "Data" },
237   { "/sys/devices/system/cpu/cpu25/cache/index1/shared_cpu_list", "10,25" },
238   { "/sys/devices/system/cpu/cpu25/cache/index1/type", "Instruction" },
239   { "/sys/devices/system/cpu/cpu25/cache/index2/shared_cpu_list", "10,25" },
240   { "/sys/devices/system/cpu/cpu25/cache/index2/type", "Unified" },
241   { "/sys/devices/system/cpu/cpu25/cache/index3/shared_cpu_list", "9-16,24-31"},
242   { "/sys/devices/system/cpu/cpu25/cache/index3/type", "Unified" },
243   { "/sys/devices/system/cpu/cpu26/cache/index0/shared_cpu_list", "11,26" },
244   { "/sys/devices/system/cpu/cpu26/cache/index0/type", "Data" },
245   { "/sys/devices/system/cpu/cpu26/cache/index1/shared_cpu_list", "11,26" },
246   { "/sys/devices/system/cpu/cpu26/cache/index1/type", "Instruction" },
247   { "/sys/devices/system/cpu/cpu26/cache/index2/shared_cpu_list", "11,26" },
248   { "/sys/devices/system/cpu/cpu26/cache/index2/type", "Unified" },
249   { "/sys/devices/system/cpu/cpu26/cache/index3/shared_cpu_list", "9-16,24-31"},
250   { "/sys/devices/system/cpu/cpu26/cache/index3/type", "Unified" },
251   { "/sys/devices/system/cpu/cpu27/cache/index0/shared_cpu_list", "12,27" },
252   { "/sys/devices/system/cpu/cpu27/cache/index0/type", "Data" },
253   { "/sys/devices/system/cpu/cpu27/cache/index1/shared_cpu_list", "12,27" },
254   { "/sys/devices/system/cpu/cpu27/cache/index1/type", "Instruction" },
255   { "/sys/devices/system/cpu/cpu27/cache/index2/shared_cpu_list", "12,27" },
256   { "/sys/devices/system/cpu/cpu27/cache/index2/type", "Unified" },
257   { "/sys/devices/system/cpu/cpu27/cache/index3/shared_cpu_list", "9-16,24-31"},
258   { "/sys/devices/system/cpu/cpu27/cache/index3/type", "Unified" },
259   { "/sys/devices/system/cpu/cpu28/cache/index0/shared_cpu_list", "13,28" },
260   { "/sys/devices/system/cpu/cpu28/cache/index0/type", "Data" },
261   { "/sys/devices/system/cpu/cpu28/cache/index1/shared_cpu_list", "13,28" },
262   { "/sys/devices/system/cpu/cpu28/cache/index1/type", "Instruction" },
263   { "/sys/devices/system/cpu/cpu28/cache/index2/shared_cpu_list", "13,28" },
264   { "/sys/devices/system/cpu/cpu28/cache/index2/type", "Unified" },
265   { "/sys/devices/system/cpu/cpu28/cache/index3/shared_cpu_list", "9-16,24-31"},
266   { "/sys/devices/system/cpu/cpu28/cache/index3/type", "Unified" },
267   { "/sys/devices/system/cpu/cpu29/cache/index0/shared_cpu_list", "14,29" },
268   { "/sys/devices/system/cpu/cpu29/cache/index0/type", "Data" },
269   { "/sys/devices/system/cpu/cpu29/cache/index1/shared_cpu_list", "14,29" },
270   { "/sys/devices/system/cpu/cpu29/cache/index1/type", "Instruction" },
271   { "/sys/devices/system/cpu/cpu29/cache/index2/shared_cpu_list", "14,29" },
272   { "/sys/devices/system/cpu/cpu29/cache/index2/type", "Unified" },
273   { "/sys/devices/system/cpu/cpu29/cache/index3/shared_cpu_list", "9-16,24-31"},
274   { "/sys/devices/system/cpu/cpu29/cache/index3/type", "Unified" },
275   { "/sys/devices/system/cpu/cpu30/cache/index0/shared_cpu_list", "15,30" },
276   { "/sys/devices/system/cpu/cpu30/cache/index0/type", "Data" },
277   { "/sys/devices/system/cpu/cpu30/cache/index1/shared_cpu_list", "15,30" },
278   { "/sys/devices/system/cpu/cpu30/cache/index1/type", "Instruction" },
279   { "/sys/devices/system/cpu/cpu30/cache/index2/shared_cpu_list", "15,30" },
280   { "/sys/devices/system/cpu/cpu30/cache/index2/type", "Unified" },
281   { "/sys/devices/system/cpu/cpu30/cache/index3/shared_cpu_list", "9-16,24-31"},
282   { "/sys/devices/system/cpu/cpu30/cache/index3/type", "Unified" },
283   { "/sys/devices/system/cpu/cpu31/cache/index0/shared_cpu_list", "16,31" },
284   { "/sys/devices/system/cpu/cpu31/cache/index0/type", "Data" },
285   { "/sys/devices/system/cpu/cpu31/cache/index1/shared_cpu_list", "16,31" },
286   { "/sys/devices/system/cpu/cpu31/cache/index1/type", "Instruction" },
287   { "/sys/devices/system/cpu/cpu31/cache/index2/shared_cpu_list", "16,31" },
288   { "/sys/devices/system/cpu/cpu31/cache/index2/type", "Unified" },
289   { "/sys/devices/system/cpu/cpu31/cache/index3/shared_cpu_list", "9-16,24-31"},
290   { "/sys/devices/system/cpu/cpu31/cache/index3/type", "Unified" }
291 };
292
293 /// This is the expected CacheLocality structure for fakeSysfsTree
294 static const CacheLocality nonUniformExampleLocality = {
295   32,
296   { 16, 16, 2 },
297   { 0, 2, 4, 6, 8, 10, 11, 12, 14, 16, 18, 20, 22, 24, 26, 28,
298     30, 1, 3, 5, 7, 9, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }
299 };
300
301 TEST(CacheLocality, FakeSysfs) {
302   auto parsed = CacheLocality::readFromSysfsTree([](std::string name) {
303     auto iter = fakeSysfsTree.find(name);
304     return iter == fakeSysfsTree.end() ? std::string() : iter->second;
305   });
306
307   auto& expected = nonUniformExampleLocality;
308   EXPECT_EQ(expected.numCpus, parsed.numCpus);
309   EXPECT_EQ(expected.numCachesByLevel, parsed.numCachesByLevel);
310   EXPECT_EQ(expected.localityIndexByCpu, parsed.localityIndexByCpu);
311 }
312
313 TEST(Getcpu, VdsoGetcpu) {
314   unsigned cpu;
315   Getcpu::vdsoFunc()(&cpu, nullptr, nullptr);
316
317   EXPECT_TRUE(cpu < CPU_SETSIZE);
318 }
319
320 #ifdef FOLLY_TLS
321 TEST(ThreadId, SimpleTls) {
322   unsigned cpu = 0;
323   auto rv =
324       folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
325           &cpu, nullptr, nullptr);
326   EXPECT_EQ(rv, 0);
327   EXPECT_TRUE(cpu > 0);
328   unsigned again;
329   folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
330       &again, nullptr, nullptr);
331   EXPECT_EQ(cpu, again);
332 }
333 #endif
334
335 TEST(ThreadId, SimplePthread) {
336   unsigned cpu = 0;
337   auto rv = folly::detail::FallbackGetcpu<HashingThreadId>::getcpu(
338       &cpu, nullptr, nullptr);
339   EXPECT_EQ(rv, 0);
340   EXPECT_TRUE(cpu > 0);
341   unsigned again;
342   folly::detail::FallbackGetcpu<HashingThreadId>::getcpu(
343       &again, nullptr, nullptr);
344   EXPECT_EQ(cpu, again);
345 }
346
347 static FOLLY_TLS unsigned testingCpu = 0;
348
349 static int testingGetcpu(unsigned* cpu, unsigned* node, void* /* unused */) {
350   if (cpu != nullptr) {
351     *cpu = testingCpu;
352   }
353   if (node != nullptr) {
354     *node = testingCpu;
355   }
356   return 0;
357 }
358
359 TEST(AccessSpreader, Stubbed) {
360   std::vector<std::unique_ptr<AccessSpreader<>>> spreaders(100);
361   for (size_t s = 1; s < spreaders.size(); ++s) {
362     spreaders[s].reset(new AccessSpreader<>(
363         s, nonUniformExampleLocality, &testingGetcpu));
364   }
365   std::vector<size_t> cpusInLocalityOrder = {
366       0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 6, 7, 22, 8, 23, 9, 24, 10, 25,
367       11, 26, 12, 27, 13, 28, 14, 29, 15, 30, 16, 31 };
368   for (size_t i = 0; i < 32; ++i) {
369     // extra i * 32 is to check wrapping behavior of impl
370     testingCpu = cpusInLocalityOrder[i] + i * 64;
371     for (size_t s = 1; s < spreaders.size(); ++s) {
372       EXPECT_EQ((i * s) / 32, spreaders[s]->current())
373           << "i=" << i << ", cpu=" << testingCpu << ", s=" << s;
374     }
375   }
376 }
377
378 TEST(AccessSpreader, Default) {
379   AccessSpreader<> spreader(16);
380   EXPECT_LT(spreader.current(), 16);
381 }
382
383 TEST(AccessSpreader, Shared) {
384   for (size_t s = 1; s < 200; ++s) {
385     EXPECT_LT(AccessSpreader<>::shared(s).current(), s);
386   }
387 }
388
389 TEST(AccessSpreader, Statics) {
390   LOG(INFO) << "stripeByCore.numStripes() = "
391             << AccessSpreader<>::stripeByCore.numStripes();
392   LOG(INFO) << "stripeByChip.numStripes() = "
393             << AccessSpreader<>::stripeByChip.numStripes();
394   for (size_t s = 1; s < 200; ++s) {
395     EXPECT_LT(AccessSpreader<>::current(s), s);
396   }
397 }
398
399 TEST(AccessSpreader, Wrapping) {
400   // this test won't pass unless locality.numCpus divides kMaxCpus
401   auto numCpus = 16;
402   auto locality = CacheLocality::uniform(numCpus);
403   for (size_t s = 1; s < 200; ++s) {
404     AccessSpreader<> spreader(s, locality, &testingGetcpu);
405     for (size_t c = 0; c < 400; ++c) {
406       testingCpu = c;
407       auto observed = spreader.current();
408       testingCpu = c % numCpus;
409       auto expected = spreader.current();
410       EXPECT_EQ(expected, observed)
411           << "numCpus=" << numCpus << ", s=" << s << ", c=" << c;
412     }
413   }
414 }
415
416 // Benchmarked at ~21 nanos on fbk35 (2.6) and fbk18 (3.2) kernels with
417 // a 2.2Ghz Xeon
418 // ============================================================================
419 // folly/test/CacheLocalityTest.cpp                relative  time/iter  iters/s
420 // ============================================================================
421 // LocalAccessSpreaderUse                                      20.77ns   48.16M
422 // SharedAccessSpreaderUse                                     21.95ns   45.55M
423 // AccessSpreaderConstruction                                 466.56ns    2.14M
424 // ============================================================================
425
426 BENCHMARK(LocalAccessSpreaderUse, iters) {
427   folly::BenchmarkSuspender braces;
428   AccessSpreader<> spreader(16);
429   braces.dismiss();
430
431   for (unsigned long i = 0; i < iters; ++i) {
432     auto x = spreader.current();
433     folly::doNotOptimizeAway(x);
434   }
435 }
436
437 BENCHMARK(SharedAccessSpreaderUse, iters) {
438   for (unsigned long i = 0; i < iters; ++i) {
439     auto x = AccessSpreader<>::current(16);
440     folly::doNotOptimizeAway(x);
441   }
442 }
443
444 BENCHMARK(AccessSpreaderConstruction, iters) {
445   std::aligned_storage<sizeof(AccessSpreader<>),
446                        std::alignment_of<AccessSpreader<>>::value>::type raw;
447   for (unsigned long i = 0; i < iters; ++i) {
448     auto x = new (&raw) AccessSpreader<>(16);
449     folly::doNotOptimizeAway(x);
450     x->~AccessSpreader();
451   }
452 }
453
454 enum class SpreaderType { GETCPU, SHARED, TLS_RR, PTHREAD_SELF };
455
456 // Benchmark scores here reflect the time for 32 threads to perform an
457 // atomic increment on a dual-socket E5-2660 @ 2.2Ghz.  Surprisingly,
458 // if we don't separate the counters onto unique 128 byte stripes the
459 // 1_stripe and 2_stripe results are identical, even though the L3 is
460 // claimed to have 64 byte cache lines.
461 //
462 // _stub means there was no call to getcpu or the tls round-robin
463 // implementation, because for a single stripe the cpu doesn't matter.
464 // _getcpu refers to the vdso getcpu implementation with a locally
465 // constructed AccessSpreader.  _tls_rr refers to execution using
466 // SequentialThreadId, the fallback if the vdso getcpu isn't available.
467 // _shared refers to calling AccessSpreader<>::current(numStripes)
468 // inside the hot loop.
469 //
470 // At 16_stripe_0_work and 32_stripe_0_work there is only L1 traffic,
471 // so since the stripe selection is 21 nanos the atomic increments in
472 // the L1 is ~15 nanos.  At width 8_stripe_0_work the line is expected
473 // to ping-pong almost every operation, since the loops have the same
474 // duration.  Widths 4 and 2 have the same behavior, but each tour of the
475 // cache line is 4 and 8 cores long, respectively.  These all suggest a
476 // lower bound of 60 nanos for intra-chip handoff and increment between
477 // the L1s.
478 //
479 // With 455 nanos (1K cycles) of busywork per contended increment, the
480 // system can hide all of the latency of a tour of length 4, but not
481 // quite one of length 8.  I was a bit surprised at how much worse the
482 // non-striped version got.  It seems that the inter-chip traffic also
483 // interferes with the L1-only localWork.load().  When the local work is
484 // doubled to about 1 microsecond we see that the inter-chip contention
485 // is still very important, but subdivisions on the same chip don't matter.
486 //
487 // sudo nice -n -20
488 //   _bin/folly/test/cache_locality_test --benchmark --bm_min_iters=1000000
489 // ============================================================================
490 // folly/test/CacheLocalityTest.cpp                relative  time/iter  iters/s
491 // ============================================================================
492 // LocalAccessSpreaderUse                                      13.00ns   76.94M
493 // SharedAccessSpreaderUse                                     13.04ns   76.66M
494 // AccessSpreaderConstruction                                 366.00ns    2.73M
495 // ----------------------------------------------------------------------------
496 // contentionAtWidth(1_stripe_0_work_stub)                    891.04ns    1.12M
497 // contentionAtWidth(2_stripe_0_work_getcpu)                  403.45ns    2.48M
498 // contentionAtWidth(4_stripe_0_work_getcpu)                  198.02ns    5.05M
499 // contentionAtWidth(8_stripe_0_work_getcpu)                   90.54ns   11.04M
500 // contentionAtWidth(16_stripe_0_work_getcpu)                  31.21ns   32.04M
501 // contentionAtWidth(32_stripe_0_work_getcpu)                  29.15ns   34.31M
502 // contentionAtWidth(64_stripe_0_work_getcpu)                  32.41ns   30.86M
503 // contentionAtWidth(2_stripe_0_work_tls_rr)                  958.06ns    1.04M
504 // contentionAtWidth(4_stripe_0_work_tls_rr)                  494.31ns    2.02M
505 // contentionAtWidth(8_stripe_0_work_tls_rr)                  362.34ns    2.76M
506 // contentionAtWidth(16_stripe_0_work_tls_rr)                 231.37ns    4.32M
507 // contentionAtWidth(32_stripe_0_work_tls_rr)                 128.26ns    7.80M
508 // contentionAtWidth(64_stripe_0_work_tls_rr)                 115.08ns    8.69M
509 // contentionAtWidth(2_stripe_0_work_pthread_self)            856.63ns    1.17M
510 // contentionAtWidth(4_stripe_0_work_pthread_self)            623.43ns    1.60M
511 // contentionAtWidth(8_stripe_0_work_pthread_self)            419.69ns    2.38M
512 // contentionAtWidth(16_stripe_0_work_pthread_self            217.32ns    4.60M
513 // contentionAtWidth(32_stripe_0_work_pthread_self            157.69ns    6.34M
514 // contentionAtWidth(64_stripe_0_work_pthread_self            140.94ns    7.10M
515 // contentionAtWidth(2_stripe_0_work_shared)                  406.55ns    2.46M
516 // contentionAtWidth(4_stripe_0_work_shared)                  198.28ns    5.04M
517 // contentionAtWidth(8_stripe_0_work_shared)                   90.11ns   11.10M
518 // contentionAtWidth(16_stripe_0_work_shared)                  34.53ns   28.96M
519 // contentionAtWidth(32_stripe_0_work_shared)                  30.08ns   33.25M
520 // contentionAtWidth(64_stripe_0_work_shared)                  34.60ns   28.90M
521 // atomicIncrBaseline(local_incr_0_work)                       17.51ns   57.12M
522 // ----------------------------------------------------------------------------
523 // contentionAtWidth(1_stripe_500_work_stub)                    1.87us  534.36K
524 // contentionAtWidth(2_stripe_500_work_getcpu)                542.31ns    1.84M
525 // contentionAtWidth(4_stripe_500_work_getcpu)                409.18ns    2.44M
526 // contentionAtWidth(8_stripe_500_work_getcpu)                511.05ns    1.96M
527 // contentionAtWidth(16_stripe_500_work_getcpu)               399.14ns    2.51M
528 // contentionAtWidth(32_stripe_500_work_getcpu)               399.05ns    2.51M
529 // atomicIncrBaseline(local_incr_500_work)                    399.41ns    2.50M
530 // ----------------------------------------------------------------------------
531 // contentionAtWidth(1_stripe_1000_work_stub)                   1.90us  525.73K
532 // contentionAtWidth(2_stripe_1000_work_getcpu)               792.91ns    1.26M
533 // contentionAtWidth(4_stripe_1000_work_getcpu)               788.14ns    1.27M
534 // contentionAtWidth(8_stripe_1000_work_getcpu)               794.16ns    1.26M
535 // contentionAtWidth(16_stripe_1000_work_getcpu)              785.33ns    1.27M
536 // contentionAtWidth(32_stripe_1000_work_getcpu)              786.56ns    1.27M
537 // atomicIncrBaseline(local_incr_1000_work)                   784.69ns    1.27M
538 // ============================================================================
539 static void contentionAtWidth(size_t iters, size_t stripes, size_t work,
540                               SpreaderType spreaderType,
541                               size_t counterAlignment = 128,
542                               size_t numThreads = 32) {
543   folly::BenchmarkSuspender braces;
544
545   folly::detail::Getcpu::Func getcpuFunc = nullptr;
546
547   if (spreaderType == SpreaderType::TLS_RR) {
548     getcpuFunc =
549         folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu;
550   }
551   if (spreaderType == SpreaderType::PTHREAD_SELF) {
552     getcpuFunc = folly::detail::FallbackGetcpu<HashingThreadId>::getcpu;
553   }
554
555   AccessSpreader<> spreader(
556       stripes, CacheLocality::system<std::atomic>(), getcpuFunc);
557
558   std::atomic<size_t> ready(0);
559   std::atomic<bool> go(false);
560
561   // while in theory the cache line size is 64 bytes, experiments show
562   // that we get contention on 128 byte boundaries for Ivy Bridge.  The
563   // extra indirection adds 1 or 2 nanos
564   assert(counterAlignment >= sizeof(std::atomic<size_t>));
565   std::vector<char> raw(counterAlignment * stripes);
566
567   // if we happen to be using the tlsRoundRobin, then sequentially
568   // assigning the thread identifiers is the unlikely best-case scenario.
569   // We don't want to unfairly benefit or penalize.  Computing the exact
570   // maximum likelihood of the probability distributions is annoying, so
571   // I approximate as 2/5 of the ids that have no threads, 2/5 that have
572   // 1, 2/15 that have 2, and 1/15 that have 3.  We accomplish this by
573   // wrapping back to slot 0 when we hit 1/15 and 1/5.
574
575   std::vector<std::thread> threads;
576   while (threads.size() < numThreads) {
577     threads.push_back(std::thread([&,iters,stripes,work]() {
578       std::atomic<size_t>* counters[stripes];
579       for (size_t i = 0; i < stripes; ++i) {
580         counters[i]
581           = new (raw.data() + counterAlignment * i) std::atomic<size_t>();
582       }
583
584       spreader.current();
585       ready++;
586       while (!go.load()) {
587         sched_yield();
588       }
589       std::atomic<int> localWork(0);
590       if (spreaderType == SpreaderType::SHARED) {
591         for (size_t i = iters; i > 0; --i) {
592           ++*(counters[AccessSpreader<>::current(stripes)]);
593           for (size_t j = work; j > 0; --j) {
594             localWork.load();
595           }
596         }
597       } else {
598         for (size_t i = iters; i > 0; --i) {
599           ++*(counters[spreader.current()]);
600           for (size_t j = work; j > 0; --j) {
601             localWork.load();
602           }
603         }
604       }
605     }));
606
607     if (threads.size() == numThreads / 15 ||
608         threads.size() == numThreads / 5) {
609       // create a few dummy threads to wrap back around to 0 mod numCpus
610       for (size_t i = threads.size(); i != numThreads; ++i) {
611         std::thread([&]() {
612           spreader.current();
613         }).join();
614       }
615     }
616   }
617
618   while (ready < numThreads) {
619     sched_yield();
620   }
621   braces.dismiss();
622   go = true;
623
624   for (auto& thr : threads) {
625     thr.join();
626   }
627 }
628
629 static void atomicIncrBaseline(size_t iters, size_t work,
630                                size_t numThreads = 32) {
631   folly::BenchmarkSuspender braces;
632
633   std::atomic<bool> go(false);
634
635   std::vector<std::thread> threads;
636   while (threads.size() < numThreads) {
637     threads.push_back(std::thread([&]() {
638       while (!go.load()) {
639         sched_yield();
640       }
641       std::atomic<size_t> localCounter(0);
642       std::atomic<int> localWork(0);
643       for (size_t i = iters; i > 0; --i) {
644         localCounter++;
645         for (size_t j = work; j > 0; --j) {
646           localWork.load();
647         }
648       }
649     }));
650   }
651
652   braces.dismiss();
653   go = true;
654
655   for (auto& thr : threads) {
656     thr.join();
657   }
658 }
659
660 BENCHMARK_DRAW_LINE()
661
662 BENCHMARK_NAMED_PARAM(contentionAtWidth, 1_stripe_0_work_stub,
663                       1, 0, SpreaderType::GETCPU)
664 BENCHMARK_NAMED_PARAM(contentionAtWidth, 2_stripe_0_work_getcpu,
665                       2, 0, SpreaderType::GETCPU)
666 BENCHMARK_NAMED_PARAM(contentionAtWidth, 4_stripe_0_work_getcpu,
667                       4, 0, SpreaderType::GETCPU)
668 BENCHMARK_NAMED_PARAM(contentionAtWidth, 8_stripe_0_work_getcpu,
669                       8, 0, SpreaderType::GETCPU)
670 BENCHMARK_NAMED_PARAM(contentionAtWidth, 16_stripe_0_work_getcpu,
671                       16, 0, SpreaderType::GETCPU)
672 BENCHMARK_NAMED_PARAM(contentionAtWidth, 32_stripe_0_work_getcpu,
673                       32, 0, SpreaderType::GETCPU)
674 BENCHMARK_NAMED_PARAM(contentionAtWidth, 64_stripe_0_work_getcpu,
675                       64, 0, SpreaderType::GETCPU)
676 BENCHMARK_NAMED_PARAM(contentionAtWidth, 2_stripe_0_work_tls_rr,
677                       2, 0, SpreaderType::TLS_RR)
678 BENCHMARK_NAMED_PARAM(contentionAtWidth, 4_stripe_0_work_tls_rr,
679                       4, 0, SpreaderType::TLS_RR)
680 BENCHMARK_NAMED_PARAM(contentionAtWidth, 8_stripe_0_work_tls_rr,
681                       8, 0, SpreaderType::TLS_RR)
682 BENCHMARK_NAMED_PARAM(contentionAtWidth, 16_stripe_0_work_tls_rr,
683                       16, 0, SpreaderType::TLS_RR)
684 BENCHMARK_NAMED_PARAM(contentionAtWidth, 32_stripe_0_work_tls_rr,
685                       32, 0, SpreaderType::TLS_RR)
686 BENCHMARK_NAMED_PARAM(contentionAtWidth, 64_stripe_0_work_tls_rr,
687                       64, 0, SpreaderType::TLS_RR)
688 BENCHMARK_NAMED_PARAM(contentionAtWidth,
689                       2_stripe_0_work_pthread_self,
690                       2,
691                       0,
692                       SpreaderType::PTHREAD_SELF)
693 BENCHMARK_NAMED_PARAM(contentionAtWidth,
694                       4_stripe_0_work_pthread_self,
695                       4,
696                       0,
697                       SpreaderType::PTHREAD_SELF)
698 BENCHMARK_NAMED_PARAM(contentionAtWidth,
699                       8_stripe_0_work_pthread_self,
700                       8,
701                       0,
702                       SpreaderType::PTHREAD_SELF)
703 BENCHMARK_NAMED_PARAM(contentionAtWidth,
704                       16_stripe_0_work_pthread_self,
705                       16,
706                       0,
707                       SpreaderType::PTHREAD_SELF)
708 BENCHMARK_NAMED_PARAM(contentionAtWidth,
709                       32_stripe_0_work_pthread_self,
710                       32,
711                       0,
712                       SpreaderType::PTHREAD_SELF)
713 BENCHMARK_NAMED_PARAM(contentionAtWidth,
714                       64_stripe_0_work_pthread_self,
715                       64,
716                       0,
717                       SpreaderType::PTHREAD_SELF)
718 BENCHMARK_NAMED_PARAM(contentionAtWidth, 2_stripe_0_work_shared,
719                       2, 0, SpreaderType::SHARED)
720 BENCHMARK_NAMED_PARAM(contentionAtWidth, 4_stripe_0_work_shared,
721                       4, 0, SpreaderType::SHARED)
722 BENCHMARK_NAMED_PARAM(contentionAtWidth, 8_stripe_0_work_shared,
723                       8, 0, SpreaderType::SHARED)
724 BENCHMARK_NAMED_PARAM(contentionAtWidth, 16_stripe_0_work_shared,
725                       16, 0, SpreaderType::SHARED)
726 BENCHMARK_NAMED_PARAM(contentionAtWidth, 32_stripe_0_work_shared,
727                       32, 0, SpreaderType::SHARED)
728 BENCHMARK_NAMED_PARAM(contentionAtWidth, 64_stripe_0_work_shared,
729                       64, 0, SpreaderType::SHARED)
730 BENCHMARK_NAMED_PARAM(atomicIncrBaseline, local_incr_0_work, 0)
731 BENCHMARK_DRAW_LINE()
732 BENCHMARK_NAMED_PARAM(contentionAtWidth, 1_stripe_500_work_stub,
733                       1, 500, SpreaderType::GETCPU)
734 BENCHMARK_NAMED_PARAM(contentionAtWidth, 2_stripe_500_work_getcpu,
735                       2, 500, SpreaderType::GETCPU)
736 BENCHMARK_NAMED_PARAM(contentionAtWidth, 4_stripe_500_work_getcpu,
737                       4, 500, SpreaderType::GETCPU)
738 BENCHMARK_NAMED_PARAM(contentionAtWidth, 8_stripe_500_work_getcpu,
739                       8, 500, SpreaderType::GETCPU)
740 BENCHMARK_NAMED_PARAM(contentionAtWidth, 16_stripe_500_work_getcpu,
741                       16, 500, SpreaderType::GETCPU)
742 BENCHMARK_NAMED_PARAM(contentionAtWidth, 32_stripe_500_work_getcpu,
743                       32, 500, SpreaderType::GETCPU)
744 BENCHMARK_NAMED_PARAM(atomicIncrBaseline, local_incr_500_work, 500)
745 BENCHMARK_DRAW_LINE()
746 BENCHMARK_NAMED_PARAM(contentionAtWidth, 1_stripe_1000_work_stub,
747                       1, 1000, SpreaderType::GETCPU)
748 BENCHMARK_NAMED_PARAM(contentionAtWidth, 2_stripe_1000_work_getcpu,
749                       2, 1000, SpreaderType::GETCPU)
750 BENCHMARK_NAMED_PARAM(contentionAtWidth, 4_stripe_1000_work_getcpu,
751                       4, 1000, SpreaderType::GETCPU)
752 BENCHMARK_NAMED_PARAM(contentionAtWidth, 8_stripe_1000_work_getcpu,
753                       8, 1000, SpreaderType::GETCPU)
754 BENCHMARK_NAMED_PARAM(contentionAtWidth, 16_stripe_1000_work_getcpu,
755                       16, 1000, SpreaderType::GETCPU)
756 BENCHMARK_NAMED_PARAM(contentionAtWidth, 32_stripe_1000_work_getcpu,
757                       32, 1000, SpreaderType::GETCPU)
758 BENCHMARK_NAMED_PARAM(atomicIncrBaseline, local_incr_1000_work, 1000)
759
760
761 int main(int argc, char** argv) {
762   testing::InitGoogleTest(&argc, argv);
763   gflags::ParseCommandLineFlags(&argc, &argv, true);
764   auto ret = RUN_ALL_TESTS();
765   if (!ret && FLAGS_benchmark) {
766     folly::runBenchmarks();
767   }
768   return ret;
769 }