folly/test/AtomicUnorderedMapTest.cpp

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 #include <folly/AtomicUnorderedMap.h>
  17 #include <folly/test/DeterministicSchedule.h>
  18 #include <thread>
  19 #include <semaphore.h>
  20 #include <gflags/gflags.h>
  21 #include <gtest/gtest.h>
  22 #include <folly/Benchmark.h>
  23 #include <unordered_map>
  24
  25 using namespace folly;
  26 using namespace folly::test;
  27
  28 template<class T>
  29 struct non_atomic {
  30   T value;
  31
  32   non_atomic() = default;
  33   non_atomic(const non_atomic&) = delete;
  34   constexpr /* implicit */ non_atomic(T desired): value(desired) {}
  35
  36   T operator+=(T arg) { value += arg; return load();}
  37
  38   T load(std::memory_order order= std::memory_order_seq_cst) const {
  39     return value;
  40   }
  41
  42   /* implicit */
  43   operator T() const {return load();}
  44
  45   void store(T desired, std::memory_order order = std::memory_order_seq_cst) {
  46     value = desired;
  47   }
  48
  49   T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) {
  50     T old = load();
  51     store(desired);
  52     return old;
  53   }
  54
  55   bool compare_exchange_weak(
  56       T& expected, T desired,
  57       std::memory_order success = std::memory_order_seq_cst,
  58       std::memory_order failure = std::memory_order_seq_cst) {
  59     if (value == expected) {
  60       value = desired;
  61       return true;
  62     }
  63
  64     expected = value;
  65     return false;
  66   }
  67
  68   bool compare_exchange_strong(
  69       T& expected, T desired,
  70       std::memory_order success = std::memory_order_seq_cst,
  71       std::memory_order failure = std::memory_order_seq_cst) {
  72     if (value == expected) {
  73       value = desired;
  74       return true;
  75     }
  76
  77     expected = value;
  78     return false;
  79   }
  80
  81   bool is_lock_free() const {return true;}
  82 };
  83
  84 template <typename Key,
  85           typename Value,
  86           typename IndexType,
  87           template <typename> class Atom = std::atomic,
  88           typename Allocator = std::allocator<char>>
  89 using UIM =
  90     AtomicUnorderedInsertMap<Key,
  91                              Value,
  92                              std::hash<Key>,
  93                              std::equal_to<Key>,
  94                              (boost::has_trivial_destructor<Key>::value &&
  95                               boost::has_trivial_destructor<Value>::value),
  96                              Atom,
  97                              IndexType,
  98                              Allocator>;
  99
 100 namespace {
 101 template <typename T>
 102 struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
 103 }
 104
 105 // uint16_t doesn't make sense for most platforms, but we might as well
 106 // test it
 107 using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
 108 TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
 109
 110 TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
 111   UIM<std::string,
 112       std::string,
 113       TypeParam,
 114       std::atomic,
 115       folly::detail::MMapAlloc> m(100);
 116
 117   m.emplace("abc", "ABC");
 118   EXPECT_TRUE(m.find("abc") != m.cend());
 119   EXPECT_EQ(m.find("abc")->first, "abc");
 120   EXPECT_EQ(m.find("abc")->second, "ABC");
 121   EXPECT_TRUE(m.find("def") == m.cend());
 122   auto iter = m.cbegin();
 123   EXPECT_TRUE(iter != m.cend());
 124   EXPECT_TRUE(iter == m.find("abc"));
 125   auto a = iter;
 126   EXPECT_TRUE(a == iter);
 127   auto b = iter;
 128   ++iter;
 129   EXPECT_TRUE(iter == m.cend());
 130   EXPECT_TRUE(a == b);
 131   EXPECT_TRUE(a != iter);
 132   a++;
 133   EXPECT_TRUE(a == iter);
 134   EXPECT_TRUE(a != b);
 135 }
 136
 137 TEST(AtomicUnorderedInsertMap, load_factor) {
 138   AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f);
 139
 140   // we should be able to put in much more than 5000 things because of
 141   // our load factor request
 142   for (int i = 0; i < 10000; ++i) {
 143     m.emplace(i, true);
 144   }
 145 }
 146
 147 TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
 148   AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f);
 149
 150   EXPECT_THROW({
 151     for (int i = 0; i < 6000; ++i) {
 152       m.emplace(i, false);
 153     }
 154   }, std::bad_alloc);
 155 }
 156
 157 TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
 158   UIM<int, MutableAtom<int>, TypeParam> m(100);
 159
 160   for (int i = 0; i < 50; ++i) {
 161     m.emplace(i, i);
 162   }
 163
 164   m.find(1)->second.data++;
 165 }
 166
 167 TEST(UnorderedInsertMap, value_mutation) {
 168   UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
 169
 170   for (int i = 0; i < 50; ++i) {
 171     m.emplace(i, i);
 172   }
 173
 174   m.find(1)->second.data++;
 175   EXPECT_EQ(m.find(1)->second.data, 2);
 176 }
 177
 178 // This test is too expensive to run automatically.  On my dev server it
 179 // takes about 10 minutes for dbg build, 2 for opt.
 180 TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
 181   size_t capacity = 2000000000;
 182   AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
 183   for (size_t i = 0; i < capacity * 2; i += 2) {
 184     big.emplace(i, i * 10);
 185   }
 186   for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
 187     auto iter = big.find(i);
 188     if ((i & 1) == 0 && i < capacity * 2) {
 189       EXPECT_EQ(iter->second, i * 10);
 190     } else {
 191       EXPECT_TRUE(iter == big.cend());
 192     }
 193   }
 194 }
 195
 196 BENCHMARK(lookup_int_int_hit, iters) {
 197   std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
 198
 199   size_t capacity = 100000;
 200
 201   BENCHMARK_SUSPEND {
 202     ptr.reset(new AtomicUnorderedInsertMap<int,size_t>(capacity));
 203     for (size_t i = 0; i < capacity; ++i) {
 204       auto k = 3 * ((5641 * i) % capacity);
 205       ptr->emplace(k, k + 1);
 206       EXPECT_EQ(ptr->find(k)->second, k + 1);
 207     }
 208   }
 209
 210   for (size_t i = 0; i < iters; ++i) {
 211     size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
 212     auto iter = ptr->find(k);
 213     if (iter == ptr->cend() ||
 214         iter->second != k + 1) {
 215       auto jter = ptr->find(k);
 216       EXPECT_TRUE(iter == jter);
 217     }
 218     EXPECT_EQ(iter->second, k + 1);
 219   }
 220
 221   BENCHMARK_SUSPEND {
 222     ptr.reset(nullptr);
 223   }
 224 }
 225
 226 struct PairHash {
 227   size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
 228     return pr.first ^ pr.second;
 229   }
 230 };
 231
 232 void contendedRW(size_t itersPerThread,
 233                  size_t capacity,
 234                  size_t numThreads,
 235                  size_t readsPerWrite) {
 236   typedef std::pair<uint64_t,uint64_t> Key;
 237   typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
 238
 239   std::unique_ptr<Map> ptr = {};
 240   std::atomic<bool> go;
 241   std::vector<std::thread> threads;
 242
 243   BENCHMARK_SUSPEND {
 244     ptr.reset(new Map(capacity));
 245     while (threads.size() < numThreads) {
 246       threads.emplace_back([&](){
 247         while (!go) {
 248           std::this_thread::yield();
 249         }
 250
 251         size_t reads = 0;
 252         size_t writes = 0;
 253         while (reads + writes < itersPerThread) {
 254           auto r = Random::rand32();
 255           Key key(reads + writes, r);
 256           if (reads < writes * readsPerWrite ||
 257               writes >= capacity / numThreads) {
 258             // read needed
 259             ++reads;
 260             auto iter = ptr->find(key);
 261             EXPECT_TRUE(
 262                 iter == ptr->cend() ||
 263                 iter->second.data.load(std::memory_order_acquire) >= key.first);
 264           } else {
 265             ++writes;
 266             try {
 267               auto pr = ptr->emplace(key, key.first);
 268               if (!pr.second) {
 269                 pr.first->second.data++;
 270               }
 271             } catch (std::bad_alloc& x) {
 272               LOG(INFO) << "bad alloc";
 273             }
 274           }
 275         }
 276       });
 277     }
 278   }
 279
 280   go = true;
 281
 282   for (auto& thr : threads) {
 283     thr.join();
 284   }
 285
 286   BENCHMARK_SUSPEND {
 287     ptr.reset(nullptr);
 288   }
 289 }
 290
 291 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
 292 //
 293 // without MAP_HUGETLB (default)
 294 //
 295 // ============================================================================
 296 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative  time/iter
 297 //   iters/s
 298 // ============================================================================
 299 // lookup_int_int_hit                                          20.05ns   49.89M
 300 // contendedRW(small_32thr_99pct)                              70.36ns   14.21M
 301 // contendedRW(large_32thr_99pct)                             164.23ns    6.09M
 302 // contendedRW(large_32thr_99_9pct)                           158.81ns    6.30M
 303 // ============================================================================
 304 //
 305 // with MAP_HUGETLB hacked in
 306 // ============================================================================
 307 // lookup_int_int_hit                                          19.67ns   50.84M
 308 // contendedRW(small_32thr_99pct)                              62.46ns   16.01M
 309 // contendedRW(large_32thr_99pct)                             119.41ns    8.37M
 310 // contendedRW(large_32thr_99_9pct)                           111.23ns    8.99M
 311 // ============================================================================
 312 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
 313 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
 314 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
 315
 316 BENCHMARK_DRAW_LINE();
 317
 318 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
 319 // Single threaded benchmarks to test how much better we are than
 320 // std::unordered_map and what is the cost of using atomic operations
 321 // in the uncontended use case
 322 // ============================================================================
 323 // std_map                                                      1.20ms   832.58
 324 // atomic_fast_map                                            511.35us    1.96K
 325 // fast_map                                                   196.28us    5.09K
 326 // ============================================================================
 327
 328 BENCHMARK(std_map) {
 329   std::unordered_map<long, long> m;
 330   m.reserve(10000);
 331   for (int i=0; i<10000; ++i) {
 332     m.emplace(i,i);
 333   }
 334
 335   for (int i=0; i<10000; ++i) {
 336     auto a = m.find(i);
 337     folly::doNotOptimizeAway(&*a);
 338   }
 339 }
 340
 341 BENCHMARK(atomic_fast_map) {
 342   UIM<long, long, uint32_t, std::atomic> m(10000);
 343   for (int i=0; i<10000; ++i) {
 344     m.emplace(i,i);
 345   }
 346
 347   for (int i=0; i<10000; ++i) {
 348     auto a = m.find(i);
 349     folly::doNotOptimizeAway(&*a);
 350   }
 351 }
 352
 353 BENCHMARK(fast_map) {
 354   UIM<long, long, uint32_t, non_atomic> m(10000);
 355   for (int i=0; i<10000; ++i) {
 356     m.emplace(i,i);
 357   }
 358
 359   for (int i=0; i<10000; ++i) {
 360     auto a = m.find(i);
 361     folly::doNotOptimizeAway(&*a);
 362   }
 363 }
 364
 365 BENCHMARK(atomic_fast_map_64) {
 366   UIM<long, long, uint64_t, std::atomic> m(10000);
 367   for (int i=0; i<10000; ++i) {
 368     m.emplace(i,i);
 369   }
 370
 371   for (int i=0; i<10000; ++i) {
 372     auto a = m.find(i);
 373     folly::doNotOptimizeAway(&*a);
 374   }
 375 }
 376
 377 BENCHMARK(fast_map_64) {
 378   UIM<long, long, uint64_t, non_atomic> m(10000);
 379   for (int i=0; i<10000; ++i) {
 380     m.emplace(i,i);
 381   }
 382
 383   for (int i=0; i<10000; ++i) {
 384     auto a = m.find(i);
 385     folly::doNotOptimizeAway(&*a);
 386   }
 387 }
 388
 389
 390 int main(int argc, char ** argv) {
 391   testing::InitGoogleTest(&argc, argv);
 392   google::ParseCommandLineFlags(&argc, &argv, true);
 393   int rv = RUN_ALL_TESTS();
 394   folly::runBenchmarksOnFlag();
 395   return rv;
 396 }