folly/test/AtomicUnorderedMapTest.cpp

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/AtomicUnorderedMap.h>
  18
  19 #include <semaphore.h>
  20 #include <thread>
  21 #include <unordered_map>
  22
  23 #include <gtest/gtest.h>
  24
  25 #include <folly/Benchmark.h>
  26 #include <folly/portability/GFlags.h>
  27 #include <folly/test/DeterministicSchedule.h>
  28
  29 using namespace folly;
  30 using namespace folly::test;
  31
  32 template<class T>
  33 struct non_atomic {
  34   T value;
  35
  36   non_atomic() = default;
  37   non_atomic(const non_atomic&) = delete;
  38   constexpr /* implicit */ non_atomic(T desired): value(desired) {}
  39
  40   T operator+=(T arg) { value += arg; return load();}
  41
  42   T load(std::memory_order /* order */ = std::memory_order_seq_cst) const {
  43     return value;
  44   }
  45
  46   /* implicit */
  47   operator T() const {return load();}
  48
  49   void store(T desired,
  50              std::memory_order /* order */ = std::memory_order_seq_cst) {
  51     value = desired;
  52   }
  53
  54   T exchange(T desired,
  55              std::memory_order /* order */ = std::memory_order_seq_cst) {
  56     T old = load();
  57     store(desired);
  58     return old;
  59   }
  60
  61   bool compare_exchange_weak(
  62       T& expected,
  63       T desired,
  64       std::memory_order /* success */ = std::memory_order_seq_cst,
  65       std::memory_order /* failure */ = std::memory_order_seq_cst) {
  66     if (value == expected) {
  67       value = desired;
  68       return true;
  69     }
  70
  71     expected = value;
  72     return false;
  73   }
  74
  75   bool compare_exchange_strong(
  76       T& expected,
  77       T desired,
  78       std::memory_order /* success */ = std::memory_order_seq_cst,
  79       std::memory_order /* failure */ = std::memory_order_seq_cst) {
  80     if (value == expected) {
  81       value = desired;
  82       return true;
  83     }
  84
  85     expected = value;
  86     return false;
  87   }
  88
  89   bool is_lock_free() const {return true;}
  90 };
  91
  92 template <typename Key,
  93           typename Value,
  94           typename IndexType,
  95           template <typename> class Atom = std::atomic,
  96           typename Allocator = std::allocator<char>>
  97 using UIM =
  98     AtomicUnorderedInsertMap<Key,
  99                              Value,
 100                              std::hash<Key>,
 101                              std::equal_to<Key>,
 102                              (boost::has_trivial_destructor<Key>::value &&
 103                               boost::has_trivial_destructor<Value>::value),
 104                              Atom,
 105                              IndexType,
 106                              Allocator>;
 107
 108 namespace {
 109 template <typename T>
 110 struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
 111 }
 112
 113 // uint16_t doesn't make sense for most platforms, but we might as well
 114 // test it
 115 using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
 116 TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
 117
 118 TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
 119   UIM<std::string,
 120       std::string,
 121       TypeParam,
 122       std::atomic,
 123       folly::detail::MMapAlloc> m(100);
 124
 125   m.emplace("abc", "ABC");
 126   EXPECT_TRUE(m.find("abc") != m.cend());
 127   EXPECT_EQ(m.find("abc")->first, "abc");
 128   EXPECT_EQ(m.find("abc")->second, "ABC");
 129   EXPECT_TRUE(m.find("def") == m.cend());
 130   auto iter = m.cbegin();
 131   EXPECT_TRUE(iter != m.cend());
 132   EXPECT_TRUE(iter == m.find("abc"));
 133   auto a = iter;
 134   EXPECT_TRUE(a == iter);
 135   auto b = iter;
 136   ++iter;
 137   EXPECT_TRUE(iter == m.cend());
 138   EXPECT_TRUE(a == b);
 139   EXPECT_TRUE(a != iter);
 140   a++;
 141   EXPECT_TRUE(a == iter);
 142   EXPECT_TRUE(a != b);
 143 }
 144
 145 TEST(AtomicUnorderedInsertMap, load_factor) {
 146   AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f);
 147
 148   // we should be able to put in much more than 5000 things because of
 149   // our load factor request
 150   for (int i = 0; i < 10000; ++i) {
 151     m.emplace(i, true);
 152   }
 153 }
 154
 155 TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
 156   AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f);
 157
 158   EXPECT_THROW({
 159     for (int i = 0; i < 6000; ++i) {
 160       m.emplace(i, false);
 161     }
 162   }, std::bad_alloc);
 163 }
 164
 165 TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
 166   UIM<int, MutableAtom<int>, TypeParam> m(100);
 167
 168   for (int i = 0; i < 50; ++i) {
 169     m.emplace(i, i);
 170   }
 171
 172   m.find(1)->second.data++;
 173 }
 174
 175 TEST(UnorderedInsertMap, value_mutation) {
 176   UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
 177
 178   for (int i = 0; i < 50; ++i) {
 179     m.emplace(i, i);
 180   }
 181
 182   m.find(1)->second.data++;
 183   EXPECT_EQ(m.find(1)->second.data, 2);
 184 }
 185
 186 // This test is too expensive to run automatically.  On my dev server it
 187 // takes about 10 minutes for dbg build, 2 for opt.
 188 TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
 189   size_t capacity = 2000000000;
 190   AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
 191   for (size_t i = 0; i < capacity * 2; i += 2) {
 192     big.emplace(i, i * 10);
 193   }
 194   for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
 195     auto iter = big.find(i);
 196     if ((i & 1) == 0 && i < capacity * 2) {
 197       EXPECT_EQ(iter->second, i * 10);
 198     } else {
 199       EXPECT_TRUE(iter == big.cend());
 200     }
 201   }
 202 }
 203
 204 BENCHMARK(lookup_int_int_hit, iters) {
 205   std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
 206
 207   size_t capacity = 100000;
 208
 209   BENCHMARK_SUSPEND {
 210     ptr.reset(new AtomicUnorderedInsertMap<int,size_t>(capacity));
 211     for (size_t i = 0; i < capacity; ++i) {
 212       auto k = 3 * ((5641 * i) % capacity);
 213       ptr->emplace(k, k + 1);
 214       EXPECT_EQ(ptr->find(k)->second, k + 1);
 215     }
 216   }
 217
 218   for (size_t i = 0; i < iters; ++i) {
 219     size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
 220     auto iter = ptr->find(k);
 221     if (iter == ptr->cend() ||
 222         iter->second != k + 1) {
 223       auto jter = ptr->find(k);
 224       EXPECT_TRUE(iter == jter);
 225     }
 226     EXPECT_EQ(iter->second, k + 1);
 227   }
 228
 229   BENCHMARK_SUSPEND {
 230     ptr.reset(nullptr);
 231   }
 232 }
 233
 234 struct PairHash {
 235   size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
 236     return pr.first ^ pr.second;
 237   }
 238 };
 239
 240 void contendedRW(size_t itersPerThread,
 241                  size_t capacity,
 242                  size_t numThreads,
 243                  size_t readsPerWrite) {
 244   typedef std::pair<uint64_t,uint64_t> Key;
 245   typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
 246
 247   std::unique_ptr<Map> ptr = {};
 248   std::atomic<bool> go;
 249   std::vector<std::thread> threads;
 250
 251   BENCHMARK_SUSPEND {
 252     ptr.reset(new Map(capacity));
 253     while (threads.size() < numThreads) {
 254       threads.emplace_back([&](){
 255         while (!go) {
 256           std::this_thread::yield();
 257         }
 258
 259         size_t reads = 0;
 260         size_t writes = 0;
 261         while (reads + writes < itersPerThread) {
 262           auto r = Random::rand32();
 263           Key key(reads + writes, r);
 264           if (reads < writes * readsPerWrite ||
 265               writes >= capacity / numThreads) {
 266             // read needed
 267             ++reads;
 268             auto iter = ptr->find(key);
 269             EXPECT_TRUE(
 270                 iter == ptr->cend() ||
 271                 iter->second.data.load(std::memory_order_acquire) >= key.first);
 272           } else {
 273             ++writes;
 274             try {
 275               auto pr = ptr->emplace(key, key.first);
 276               if (!pr.second) {
 277                 pr.first->second.data++;
 278               }
 279             } catch (std::bad_alloc& x) {
 280               LOG(INFO) << "bad alloc";
 281             }
 282           }
 283         }
 284       });
 285     }
 286   }
 287
 288   go = true;
 289
 290   for (auto& thr : threads) {
 291     thr.join();
 292   }
 293
 294   BENCHMARK_SUSPEND {
 295     ptr.reset(nullptr);
 296   }
 297 }
 298
 299 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
 300 //
 301 // without MAP_HUGETLB (default)
 302 //
 303 // ============================================================================
 304 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative  time/iter
 305 //   iters/s
 306 // ============================================================================
 307 // lookup_int_int_hit                                          20.05ns   49.89M
 308 // contendedRW(small_32thr_99pct)                              70.36ns   14.21M
 309 // contendedRW(large_32thr_99pct)                             164.23ns    6.09M
 310 // contendedRW(large_32thr_99_9pct)                           158.81ns    6.30M
 311 // ============================================================================
 312 //
 313 // with MAP_HUGETLB hacked in
 314 // ============================================================================
 315 // lookup_int_int_hit                                          19.67ns   50.84M
 316 // contendedRW(small_32thr_99pct)                              62.46ns   16.01M
 317 // contendedRW(large_32thr_99pct)                             119.41ns    8.37M
 318 // contendedRW(large_32thr_99_9pct)                           111.23ns    8.99M
 319 // ============================================================================
 320 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
 321 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
 322 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
 323
 324 BENCHMARK_DRAW_LINE();
 325
 326 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
 327 // Single threaded benchmarks to test how much better we are than
 328 // std::unordered_map and what is the cost of using atomic operations
 329 // in the uncontended use case
 330 // ============================================================================
 331 // std_map                                                      1.20ms   832.58
 332 // atomic_fast_map                                            511.35us    1.96K
 333 // fast_map                                                   196.28us    5.09K
 334 // ============================================================================
 335
 336 BENCHMARK(std_map) {
 337   std::unordered_map<long, long> m;
 338   m.reserve(10000);
 339   for (int i=0; i<10000; ++i) {
 340     m.emplace(i,i);
 341   }
 342
 343   for (int i=0; i<10000; ++i) {
 344     auto a = m.find(i);
 345     folly::doNotOptimizeAway(&*a);
 346   }
 347 }
 348
 349 BENCHMARK(atomic_fast_map) {
 350   UIM<long, long, uint32_t, std::atomic> m(10000);
 351   for (int i=0; i<10000; ++i) {
 352     m.emplace(i,i);
 353   }
 354
 355   for (int i=0; i<10000; ++i) {
 356     auto a = m.find(i);
 357     folly::doNotOptimizeAway(&*a);
 358   }
 359 }
 360
 361 BENCHMARK(fast_map) {
 362   UIM<long, long, uint32_t, non_atomic> m(10000);
 363   for (int i=0; i<10000; ++i) {
 364     m.emplace(i,i);
 365   }
 366
 367   for (int i=0; i<10000; ++i) {
 368     auto a = m.find(i);
 369     folly::doNotOptimizeAway(&*a);
 370   }
 371 }
 372
 373 BENCHMARK(atomic_fast_map_64) {
 374   UIM<long, long, uint64_t, std::atomic> m(10000);
 375   for (int i=0; i<10000; ++i) {
 376     m.emplace(i,i);
 377   }
 378
 379   for (int i=0; i<10000; ++i) {
 380     auto a = m.find(i);
 381     folly::doNotOptimizeAway(&*a);
 382   }
 383 }
 384
 385 BENCHMARK(fast_map_64) {
 386   UIM<long, long, uint64_t, non_atomic> m(10000);
 387   for (int i=0; i<10000; ++i) {
 388     m.emplace(i,i);
 389   }
 390
 391   for (int i=0; i<10000; ++i) {
 392     auto a = m.find(i);
 393     folly::doNotOptimizeAway(&*a);
 394   }
 395 }
 396
 397
 398 int main(int argc, char ** argv) {
 399   testing::InitGoogleTest(&argc, argv);
 400   google::ParseCommandLineFlags(&argc, &argv, true);
 401   int rv = RUN_ALL_TESTS();
 402   folly::runBenchmarksOnFlag();
 403   return rv;
 404 }