folly/test/AtomicUnorderedMapTest.cpp

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/AtomicUnorderedMap.h>
  18
  19 #include <semaphore.h>
  20 #include <thread>
  21 #include <unordered_map>
  22
  23 #include <folly/Benchmark.h>
  24 #include <folly/portability/GFlags.h>
  25 #include <folly/portability/GTest.h>
  26 #include <folly/test/DeterministicSchedule.h>
  27
  28 using namespace folly;
  29 using namespace folly::test;
  30
  31 template<class T>
  32 struct non_atomic {
  33   T value;
  34
  35   non_atomic() = default;
  36   non_atomic(const non_atomic&) = delete;
  37   constexpr /* implicit */ non_atomic(T desired): value(desired) {}
  38
  39   T operator+=(T arg) { value += arg; return load();}
  40
  41   T load(std::memory_order /* order */ = std::memory_order_seq_cst) const {
  42     return value;
  43   }
  44
  45   /* implicit */
  46   operator T() const {return load();}
  47
  48   void store(T desired,
  49              std::memory_order /* order */ = std::memory_order_seq_cst) {
  50     value = desired;
  51   }
  52
  53   T exchange(T desired,
  54              std::memory_order /* order */ = std::memory_order_seq_cst) {
  55     T old = load();
  56     store(desired);
  57     return old;
  58   }
  59
  60   bool compare_exchange_weak(
  61       T& expected,
  62       T desired,
  63       std::memory_order /* success */ = std::memory_order_seq_cst,
  64       std::memory_order /* failure */ = std::memory_order_seq_cst) {
  65     if (value == expected) {
  66       value = desired;
  67       return true;
  68     }
  69
  70     expected = value;
  71     return false;
  72   }
  73
  74   bool compare_exchange_strong(
  75       T& expected,
  76       T desired,
  77       std::memory_order /* success */ = std::memory_order_seq_cst,
  78       std::memory_order /* failure */ = std::memory_order_seq_cst) {
  79     if (value == expected) {
  80       value = desired;
  81       return true;
  82     }
  83
  84     expected = value;
  85     return false;
  86   }
  87
  88   bool is_lock_free() const {return true;}
  89 };
  90
  91 template <typename Key,
  92           typename Value,
  93           typename IndexType,
  94           template <typename> class Atom = std::atomic,
  95           typename Allocator = std::allocator<char>>
  96 using UIM =
  97     AtomicUnorderedInsertMap<Key,
  98                              Value,
  99                              std::hash<Key>,
 100                              std::equal_to<Key>,
 101                              (boost::has_trivial_destructor<Key>::value &&
 102                               boost::has_trivial_destructor<Value>::value),
 103                              Atom,
 104                              IndexType,
 105                              Allocator>;
 106
 107 namespace {
 108 template <typename T>
 109 struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
 110 }
 111
 112 // uint16_t doesn't make sense for most platforms, but we might as well
 113 // test it
 114 using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
 115 TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
 116
 117 TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
 118   UIM<std::string,
 119       std::string,
 120       TypeParam,
 121       std::atomic,
 122       folly::detail::MMapAlloc> m(100);
 123
 124   m.emplace("abc", "ABC");
 125   EXPECT_TRUE(m.find("abc") != m.cend());
 126   EXPECT_EQ(m.find("abc")->first, "abc");
 127   EXPECT_EQ(m.find("abc")->second, "ABC");
 128   EXPECT_TRUE(m.find("def") == m.cend());
 129   auto iter = m.cbegin();
 130   EXPECT_TRUE(iter != m.cend());
 131   EXPECT_TRUE(iter == m.find("abc"));
 132   auto a = iter;
 133   EXPECT_TRUE(a == iter);
 134   auto b = iter;
 135   ++iter;
 136   EXPECT_TRUE(iter == m.cend());
 137   EXPECT_TRUE(a == b);
 138   EXPECT_TRUE(a != iter);
 139   a++;
 140   EXPECT_TRUE(a == iter);
 141   EXPECT_TRUE(a != b);
 142 }
 143
 144 TEST(AtomicUnorderedInsertMap, load_factor) {
 145   AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f);
 146
 147   // we should be able to put in much more than 5000 things because of
 148   // our load factor request
 149   for (int i = 0; i < 10000; ++i) {
 150     m.emplace(i, true);
 151   }
 152 }
 153
 154 TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
 155   AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f);
 156
 157   EXPECT_THROW({
 158     for (int i = 0; i < 6000; ++i) {
 159       m.emplace(i, false);
 160     }
 161   }, std::bad_alloc);
 162 }
 163
 164 TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
 165   UIM<int, MutableAtom<int>, TypeParam> m(100);
 166
 167   for (int i = 0; i < 50; ++i) {
 168     m.emplace(i, i);
 169   }
 170
 171   m.find(1)->second.data++;
 172 }
 173
 174 TEST(UnorderedInsertMap, value_mutation) {
 175   UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
 176
 177   for (int i = 0; i < 50; ++i) {
 178     m.emplace(i, i);
 179   }
 180
 181   m.find(1)->second.data++;
 182   EXPECT_EQ(m.find(1)->second.data, 2);
 183 }
 184
 185 // This test is too expensive to run automatically.  On my dev server it
 186 // takes about 10 minutes for dbg build, 2 for opt.
 187 TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
 188   size_t capacity = 2000000000;
 189   AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
 190   for (size_t i = 0; i < capacity * 2; i += 2) {
 191     big.emplace(i, i * 10);
 192   }
 193   for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
 194     auto iter = big.find(i);
 195     if ((i & 1) == 0 && i < capacity * 2) {
 196       EXPECT_EQ(iter->second, i * 10);
 197     } else {
 198       EXPECT_TRUE(iter == big.cend());
 199     }
 200   }
 201 }
 202
 203 BENCHMARK(lookup_int_int_hit, iters) {
 204   std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
 205
 206   size_t capacity = 100000;
 207
 208   BENCHMARK_SUSPEND {
 209     ptr.reset(new AtomicUnorderedInsertMap<int,size_t>(capacity));
 210     for (size_t i = 0; i < capacity; ++i) {
 211       auto k = 3 * ((5641 * i) % capacity);
 212       ptr->emplace(k, k + 1);
 213       EXPECT_EQ(ptr->find(k)->second, k + 1);
 214     }
 215   }
 216
 217   for (size_t i = 0; i < iters; ++i) {
 218     size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
 219     auto iter = ptr->find(k);
 220     if (iter == ptr->cend() ||
 221         iter->second != k + 1) {
 222       auto jter = ptr->find(k);
 223       EXPECT_TRUE(iter == jter);
 224     }
 225     EXPECT_EQ(iter->second, k + 1);
 226   }
 227
 228   BENCHMARK_SUSPEND {
 229     ptr.reset(nullptr);
 230   }
 231 }
 232
 233 struct PairHash {
 234   size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
 235     return pr.first ^ pr.second;
 236   }
 237 };
 238
 239 void contendedRW(size_t itersPerThread,
 240                  size_t capacity,
 241                  size_t numThreads,
 242                  size_t readsPerWrite) {
 243   typedef std::pair<uint64_t,uint64_t> Key;
 244   typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
 245
 246   std::unique_ptr<Map> ptr = {};
 247   std::atomic<bool> go;
 248   std::vector<std::thread> threads;
 249
 250   BENCHMARK_SUSPEND {
 251     ptr.reset(new Map(capacity));
 252     while (threads.size() < numThreads) {
 253       threads.emplace_back([&](){
 254         while (!go) {
 255           std::this_thread::yield();
 256         }
 257
 258         size_t reads = 0;
 259         size_t writes = 0;
 260         while (reads + writes < itersPerThread) {
 261           auto r = Random::rand32();
 262           Key key(reads + writes, r);
 263           if (reads < writes * readsPerWrite ||
 264               writes >= capacity / numThreads) {
 265             // read needed
 266             ++reads;
 267             auto iter = ptr->find(key);
 268             EXPECT_TRUE(
 269                 iter == ptr->cend() ||
 270                 iter->second.data.load(std::memory_order_acquire) >= key.first);
 271           } else {
 272             ++writes;
 273             try {
 274               auto pr = ptr->emplace(key, key.first);
 275               if (!pr.second) {
 276                 pr.first->second.data++;
 277               }
 278             } catch (std::bad_alloc& x) {
 279               LOG(INFO) << "bad alloc";
 280             }
 281           }
 282         }
 283       });
 284     }
 285   }
 286
 287   go = true;
 288
 289   for (auto& thr : threads) {
 290     thr.join();
 291   }
 292
 293   BENCHMARK_SUSPEND {
 294     ptr.reset(nullptr);
 295   }
 296 }
 297
 298 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
 299 //
 300 // without MAP_HUGETLB (default)
 301 //
 302 // ============================================================================
 303 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative  time/iter
 304 //   iters/s
 305 // ============================================================================
 306 // lookup_int_int_hit                                          20.05ns   49.89M
 307 // contendedRW(small_32thr_99pct)                              70.36ns   14.21M
 308 // contendedRW(large_32thr_99pct)                             164.23ns    6.09M
 309 // contendedRW(large_32thr_99_9pct)                           158.81ns    6.30M
 310 // ============================================================================
 311 //
 312 // with MAP_HUGETLB hacked in
 313 // ============================================================================
 314 // lookup_int_int_hit                                          19.67ns   50.84M
 315 // contendedRW(small_32thr_99pct)                              62.46ns   16.01M
 316 // contendedRW(large_32thr_99pct)                             119.41ns    8.37M
 317 // contendedRW(large_32thr_99_9pct)                           111.23ns    8.99M
 318 // ============================================================================
 319 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
 320 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
 321 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
 322
 323 BENCHMARK_DRAW_LINE();
 324
 325 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
 326 // Single threaded benchmarks to test how much better we are than
 327 // std::unordered_map and what is the cost of using atomic operations
 328 // in the uncontended use case
 329 // ============================================================================
 330 // std_map                                                      1.20ms   832.58
 331 // atomic_fast_map                                            511.35us    1.96K
 332 // fast_map                                                   196.28us    5.09K
 333 // ============================================================================
 334
 335 BENCHMARK(std_map) {
 336   std::unordered_map<long, long> m;
 337   m.reserve(10000);
 338   for (int i=0; i<10000; ++i) {
 339     m.emplace(i,i);
 340   }
 341
 342   for (int i=0; i<10000; ++i) {
 343     auto a = m.find(i);
 344     folly::doNotOptimizeAway(&*a);
 345   }
 346 }
 347
 348 BENCHMARK(atomic_fast_map) {
 349   UIM<long, long, uint32_t, std::atomic> m(10000);
 350   for (int i=0; i<10000; ++i) {
 351     m.emplace(i,i);
 352   }
 353
 354   for (int i=0; i<10000; ++i) {
 355     auto a = m.find(i);
 356     folly::doNotOptimizeAway(&*a);
 357   }
 358 }
 359
 360 BENCHMARK(fast_map) {
 361   UIM<long, long, uint32_t, non_atomic> m(10000);
 362   for (int i=0; i<10000; ++i) {
 363     m.emplace(i,i);
 364   }
 365
 366   for (int i=0; i<10000; ++i) {
 367     auto a = m.find(i);
 368     folly::doNotOptimizeAway(&*a);
 369   }
 370 }
 371
 372 BENCHMARK(atomic_fast_map_64) {
 373   UIM<long, long, uint64_t, std::atomic> m(10000);
 374   for (int i=0; i<10000; ++i) {
 375     m.emplace(i,i);
 376   }
 377
 378   for (int i=0; i<10000; ++i) {
 379     auto a = m.find(i);
 380     folly::doNotOptimizeAway(&*a);
 381   }
 382 }
 383
 384 BENCHMARK(fast_map_64) {
 385   UIM<long, long, uint64_t, non_atomic> m(10000);
 386   for (int i=0; i<10000; ++i) {
 387     m.emplace(i,i);
 388   }
 389
 390   for (int i=0; i<10000; ++i) {
 391     auto a = m.find(i);
 392     folly::doNotOptimizeAway(&*a);
 393   }
 394 }
 395
 396
 397 int main(int argc, char ** argv) {
 398   testing::InitGoogleTest(&argc, argv);
 399   gflags::ParseCommandLineFlags(&argc, &argv, true);
 400   int rv = RUN_ALL_TESTS();
 401   folly::runBenchmarksOnFlag();
 402   return rv;
 403 }