2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 #include <folly/AtomicUnorderedMap.h>
17 #include <folly/test/DeterministicSchedule.h>
19 #include <semaphore.h>
20 #include <gflags/gflags.h>
21 #include <gtest/gtest.h>
22 #include <folly/Benchmark.h>
23 #include <unordered_map>
25 using namespace folly;
26 using namespace folly::test;
32 non_atomic() = default;
33 non_atomic(const non_atomic&) = delete;
34 constexpr /* implicit */ non_atomic(T desired): value(desired) {}
36 T operator+=(T arg) { value += arg; return load();}
38 T load(std::memory_order order= std::memory_order_seq_cst) const {
43 operator T() const {return load();}
45 void store(T desired, std::memory_order order = std::memory_order_seq_cst) {
49 T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) {
55 bool compare_exchange_weak(
56 T& expected, T desired,
57 std::memory_order success = std::memory_order_seq_cst,
58 std::memory_order failure = std::memory_order_seq_cst) {
59 if (value == expected) {
68 bool compare_exchange_strong(
69 T& expected, T desired,
70 std::memory_order success = std::memory_order_seq_cst,
71 std::memory_order failure = std::memory_order_seq_cst) {
72 if (value == expected) {
81 bool is_lock_free() const {return true;}
85 typename Key, typename Value, template<typename> class Atom = non_atomic>
86 using UnorderedInsertMap = AtomicUnorderedInsertMap<
91 (boost::has_trivial_destructor<Key>::value &&
92 boost::has_trivial_destructor<Value>::value),
94 std::allocator<char>>;
96 TEST(AtomicUnorderedInsertMap, basic) {
97 AtomicUnorderedInsertMap<std::string,std::string> m(100);
99 m.emplace("abc", "ABC");
100 EXPECT_TRUE(m.find("abc") != m.cend());
101 EXPECT_EQ(m.find("abc")->first, "abc");
102 EXPECT_EQ(m.find("abc")->second, "ABC");
103 EXPECT_TRUE(m.find("def") == m.cend());
104 auto iter = m.cbegin();
105 EXPECT_TRUE(iter != m.cend());
106 EXPECT_TRUE(iter == m.find("abc"));
108 EXPECT_TRUE(a == iter);
111 EXPECT_TRUE(iter == m.cend());
113 EXPECT_TRUE(a != iter);
115 EXPECT_TRUE(a == iter);
119 TEST(AtomicUnorderedInsertMap, value_mutation) {
120 AtomicUnorderedInsertMap<int, MutableAtom<int>> m(100);
122 for (int i = 0; i < 50; ++i) {
126 m.find(1)->second.data++;
129 TEST(UnorderedInsertMap, value_mutation) {
130 UnorderedInsertMap<int, MutableData<int>> m(100);
132 for (int i = 0; i < 50; ++i) {
136 m.find(1)->second.data++;
137 EXPECT_EQ(m.find(1)->second.data, 2);
140 BENCHMARK(lookup_int_int_hit, iters) {
141 std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
143 size_t capacity = 100000;
146 ptr.reset(new AtomicUnorderedInsertMap<int,size_t>(capacity));
147 for (size_t i = 0; i < capacity; ++i) {
148 auto k = 3 * ((5641 * i) % capacity);
149 ptr->emplace(k, k + 1);
150 EXPECT_EQ(ptr->find(k)->second, k + 1);
154 for (size_t i = 0; i < iters; ++i) {
155 size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
156 auto iter = ptr->find(k);
157 if (iter == ptr->cend() ||
158 iter->second != k + 1) {
159 auto jter = ptr->find(k);
160 EXPECT_TRUE(iter == jter);
162 EXPECT_EQ(iter->second, k + 1);
171 size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
172 return pr.first ^ pr.second;
176 void contendedRW(size_t itersPerThread,
179 size_t readsPerWrite) {
180 typedef std::pair<uint64_t,uint64_t> Key;
181 typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
183 std::unique_ptr<Map> ptr = {};
184 std::atomic<bool> go;
185 std::vector<std::thread> threads;
188 ptr.reset(new Map(capacity));
189 while (threads.size() < numThreads) {
190 threads.emplace_back([&](){
192 std::this_thread::yield();
197 while (reads + writes < itersPerThread) {
198 auto r = Random::rand32();
199 Key key(reads + writes, r);
200 if (reads < writes * readsPerWrite ||
201 writes >= capacity / numThreads) {
204 auto iter = ptr->find(key);
206 iter == ptr->cend() ||
207 iter->second.data.load(std::memory_order_acquire) >= key.first);
211 auto pr = ptr->emplace(key, key.first);
213 pr.first->second.data++;
215 } catch (std::bad_alloc& x) {
216 LOG(INFO) << "bad alloc";
226 for (auto& thr : threads) {
235 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
237 // without MAP_HUGETLB (default)
239 // ============================================================================
240 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter
242 // ============================================================================
243 // lookup_int_int_hit 20.05ns 49.89M
244 // contendedRW(small_32thr_99pct) 70.36ns 14.21M
245 // contendedRW(large_32thr_99pct) 164.23ns 6.09M
246 // contendedRW(large_32thr_99_9pct) 158.81ns 6.30M
247 // ============================================================================
249 // with MAP_HUGETLB hacked in
250 // ============================================================================
251 // lookup_int_int_hit 19.67ns 50.84M
252 // contendedRW(small_32thr_99pct) 62.46ns 16.01M
253 // contendedRW(large_32thr_99pct) 119.41ns 8.37M
254 // contendedRW(large_32thr_99_9pct) 111.23ns 8.99M
255 // ============================================================================
256 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
257 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
258 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
260 BENCHMARK_DRAW_LINE();
262 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
263 // Single threaded benchmarks to test how much better we are than
264 // std::unordered_map and what is the cost of using atomic operations
265 // in the uncontended use case
266 // ============================================================================
267 // std_map 1.20ms 832.58
268 // atomic_fast_map 511.35us 1.96K
269 // fast_map 196.28us 5.09K
270 // ============================================================================
273 std::unordered_map<long, long> m;
275 for (int i=0; i<10000; ++i) {
279 for (int i=0; i<10000; ++i) {
281 folly::doNotOptimizeAway(&*a);
285 BENCHMARK(atomic_fast_map) {
286 UnorderedInsertMap<long, long, std::atomic> m(10000);
287 for (int i=0; i<10000; ++i) {
291 for (int i=0; i<10000; ++i) {
293 folly::doNotOptimizeAway(&*a);
297 BENCHMARK(fast_map) {
298 UnorderedInsertMap<long, long> m(10000);
299 for (int i=0; i<10000; ++i) {
303 for (int i=0; i<10000; ++i) {
305 folly::doNotOptimizeAway(&*a);
310 int main(int argc, char ** argv) {
311 testing::InitGoogleTest(&argc, argv);
312 google::ParseCommandLineFlags(&argc, &argv, true);
313 int rv = RUN_ALL_TESTS();
314 folly::runBenchmarksOnFlag();