folly/test/AtomicUnorderedMapTest.cpp

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 #include <folly/AtomicUnorderedMap.h>
  17 #include <folly/test/DeterministicSchedule.h>
  18 #include <thread>
  19 #include <semaphore.h>
  20 #include <gflags/gflags.h>
  21 #include <gtest/gtest.h>
  22 #include <folly/Benchmark.h>
  23 #include <unordered_map>
  24
  25 using namespace folly;
  26 using namespace folly::test;
  27
  28 template<class T>
  29 struct non_atomic {
  30   T value;
  31
  32   non_atomic() = default;
  33   non_atomic(const non_atomic&) = delete;
  34   constexpr /* implicit */ non_atomic(T desired): value(desired) {}
  35
  36   T operator+=(T arg) { value += arg; return load();}
  37
  38   T load(std::memory_order order= std::memory_order_seq_cst) const {
  39     return value;
  40   }
  41
  42   /* implicit */
  43   operator T() const {return load();}
  44
  45   void store(T desired, std::memory_order order = std::memory_order_seq_cst) {
  46     value = desired;
  47   }
  48
  49   T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) {
  50     T old = load();
  51     store(desired);
  52     return old;
  53   }
  54
  55   bool compare_exchange_weak(
  56       T& expected, T desired,
  57       std::memory_order success = std::memory_order_seq_cst,
  58       std::memory_order failure = std::memory_order_seq_cst) {
  59     if (value == expected) {
  60       value = desired;
  61       return true;
  62     }
  63
  64     expected = value;
  65     return false;
  66   }
  67
  68   bool compare_exchange_strong(
  69       T& expected, T desired,
  70       std::memory_order success = std::memory_order_seq_cst,
  71       std::memory_order failure = std::memory_order_seq_cst) {
  72     if (value == expected) {
  73       value = desired;
  74       return true;
  75     }
  76
  77     expected = value;
  78     return false;
  79   }
  80
  81   bool is_lock_free() const {return true;}
  82 };
  83
  84 template<
  85     typename Key, typename Value, template<typename> class Atom = non_atomic>
  86 using UnorderedInsertMap =  AtomicUnorderedInsertMap<
  87     Key,
  88     Value,
  89     std::hash<Key>,
  90     std::equal_to<Key>,
  91     (boost::has_trivial_destructor<Key>::value &&
  92      boost::has_trivial_destructor<Value>::value),
  93     Atom,
  94     std::allocator<char>>;
  95
  96 TEST(AtomicUnorderedInsertMap, basic) {
  97   AtomicUnorderedInsertMap<std::string,std::string> m(100);
  98
  99   m.emplace("abc", "ABC");
 100   EXPECT_TRUE(m.find("abc") != m.cend());
 101   EXPECT_EQ(m.find("abc")->first, "abc");
 102   EXPECT_EQ(m.find("abc")->second, "ABC");
 103   EXPECT_TRUE(m.find("def") == m.cend());
 104   auto iter = m.cbegin();
 105   EXPECT_TRUE(iter != m.cend());
 106   EXPECT_TRUE(iter == m.find("abc"));
 107   auto a = iter;
 108   EXPECT_TRUE(a == iter);
 109   auto b = iter;
 110   ++iter;
 111   EXPECT_TRUE(iter == m.cend());
 112   EXPECT_TRUE(a == b);
 113   EXPECT_TRUE(a != iter);
 114   a++;
 115   EXPECT_TRUE(a == iter);
 116   EXPECT_TRUE(a != b);
 117 }
 118
 119 TEST(AtomicUnorderedInsertMap, value_mutation) {
 120   AtomicUnorderedInsertMap<int, MutableAtom<int>> m(100);
 121
 122   for (int i = 0; i < 50; ++i) {
 123     m.emplace(i, i);
 124   }
 125
 126   m.find(1)->second.data++;
 127 }
 128
 129 TEST(UnorderedInsertMap, value_mutation) {
 130   UnorderedInsertMap<int, MutableData<int>> m(100);
 131
 132   for (int i = 0; i < 50; ++i) {
 133     m.emplace(i, i);
 134   }
 135
 136   m.find(1)->second.data++;
 137   EXPECT_EQ(m.find(1)->second.data, 2);
 138 }
 139
 140 BENCHMARK(lookup_int_int_hit, iters) {
 141   std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
 142
 143   size_t capacity = 100000;
 144
 145   BENCHMARK_SUSPEND {
 146     ptr.reset(new AtomicUnorderedInsertMap<int,size_t>(capacity));
 147     for (size_t i = 0; i < capacity; ++i) {
 148       auto k = 3 * ((5641 * i) % capacity);
 149       ptr->emplace(k, k + 1);
 150       EXPECT_EQ(ptr->find(k)->second, k + 1);
 151     }
 152   }
 153
 154   for (size_t i = 0; i < iters; ++i) {
 155     size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
 156     auto iter = ptr->find(k);
 157     if (iter == ptr->cend() ||
 158         iter->second != k + 1) {
 159       auto jter = ptr->find(k);
 160       EXPECT_TRUE(iter == jter);
 161     }
 162     EXPECT_EQ(iter->second, k + 1);
 163   }
 164
 165   BENCHMARK_SUSPEND {
 166     ptr.reset(nullptr);
 167   }
 168 }
 169
 170 struct PairHash {
 171   size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
 172     return pr.first ^ pr.second;
 173   }
 174 };
 175
 176 void contendedRW(size_t itersPerThread,
 177                  size_t capacity,
 178                  size_t numThreads,
 179                  size_t readsPerWrite) {
 180   typedef std::pair<uint64_t,uint64_t> Key;
 181   typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
 182
 183   std::unique_ptr<Map> ptr = {};
 184   std::atomic<bool> go;
 185   std::vector<std::thread> threads;
 186
 187   BENCHMARK_SUSPEND {
 188     ptr.reset(new Map(capacity));
 189     while (threads.size() < numThreads) {
 190       threads.emplace_back([&](){
 191         while (!go) {
 192           std::this_thread::yield();
 193         }
 194
 195         size_t reads = 0;
 196         size_t writes = 0;
 197         while (reads + writes < itersPerThread) {
 198           auto r = Random::rand32();
 199           Key key(reads + writes, r);
 200           if (reads < writes * readsPerWrite ||
 201               writes >= capacity / numThreads) {
 202             // read needed
 203             ++reads;
 204             auto iter = ptr->find(key);
 205             EXPECT_TRUE(
 206                 iter == ptr->cend() ||
 207                 iter->second.data.load(std::memory_order_acquire) >= key.first);
 208           } else {
 209             ++writes;
 210             try {
 211               auto pr = ptr->emplace(key, key.first);
 212               if (!pr.second) {
 213                 pr.first->second.data++;
 214               }
 215             } catch (std::bad_alloc& x) {
 216               LOG(INFO) << "bad alloc";
 217             }
 218           }
 219         }
 220       });
 221     }
 222   }
 223
 224   go = true;
 225
 226   for (auto& thr : threads) {
 227     thr.join();
 228   }
 229
 230   BENCHMARK_SUSPEND {
 231     ptr.reset(nullptr);
 232   }
 233 }
 234
 235 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
 236 //
 237 // without MAP_HUGETLB (default)
 238 //
 239 // ============================================================================
 240 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative  time/iter
 241 //   iters/s
 242 // ============================================================================
 243 // lookup_int_int_hit                                          20.05ns   49.89M
 244 // contendedRW(small_32thr_99pct)                              70.36ns   14.21M
 245 // contendedRW(large_32thr_99pct)                             164.23ns    6.09M
 246 // contendedRW(large_32thr_99_9pct)                           158.81ns    6.30M
 247 // ============================================================================
 248 //
 249 // with MAP_HUGETLB hacked in
 250 // ============================================================================
 251 // lookup_int_int_hit                                          19.67ns   50.84M
 252 // contendedRW(small_32thr_99pct)                              62.46ns   16.01M
 253 // contendedRW(large_32thr_99pct)                             119.41ns    8.37M
 254 // contendedRW(large_32thr_99_9pct)                           111.23ns    8.99M
 255 // ============================================================================
 256 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
 257 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
 258 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
 259
 260 BENCHMARK_DRAW_LINE();
 261
 262 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
 263 // Single threaded benchmarks to test how much better we are than
 264 // std::unordered_map and what is the cost of using atomic operations
 265 // in the uncontended use case
 266 // ============================================================================
 267 // std_map                                                      1.20ms   832.58
 268 // atomic_fast_map                                            511.35us    1.96K
 269 // fast_map                                                   196.28us    5.09K
 270 // ============================================================================
 271
 272 BENCHMARK(std_map) {
 273   std::unordered_map<long, long> m;
 274   m.reserve(10000);
 275   for (int i=0; i<10000; ++i) {
 276     m.emplace(i,i);
 277   }
 278
 279   for (int i=0; i<10000; ++i) {
 280     auto a = m.find(i);
 281     folly::doNotOptimizeAway(&*a);
 282   }
 283 }
 284
 285 BENCHMARK(atomic_fast_map) {
 286   UnorderedInsertMap<long, long, std::atomic> m(10000);
 287   for (int i=0; i<10000; ++i) {
 288     m.emplace(i,i);
 289   }
 290
 291   for (int i=0; i<10000; ++i) {
 292     auto a = m.find(i);
 293     folly::doNotOptimizeAway(&*a);
 294   }
 295 }
 296
 297 BENCHMARK(fast_map) {
 298   UnorderedInsertMap<long, long> m(10000);
 299   for (int i=0; i<10000; ++i) {
 300     m.emplace(i,i);
 301   }
 302
 303   for (int i=0; i<10000; ++i) {
 304     auto a = m.find(i);
 305     folly::doNotOptimizeAway(&*a);
 306   }
 307 }
 308
 309
 310 int main(int argc, char ** argv) {
 311   testing::InitGoogleTest(&argc, argv);
 312   google::ParseCommandLineFlags(&argc, &argv, true);
 313   int rv = RUN_ALL_TESTS();
 314   folly::runBenchmarksOnFlag();
 315   return rv;
 316 }