folly/test/AtomicHashMapTest.cpp

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/AtomicHashMap.h>
  18
  19 #include <glog/logging.h>
  20 #include <gtest/gtest.h>
  21 #include <sys/time.h>
  22 #include <thread>
  23 #include <atomic>
  24 #include <memory>
  25 #include <folly/Benchmark.h>
  26 #include <folly/Conv.h>
  27
  28 using std::vector;
  29 using std::string;
  30 using folly::AtomicHashMap;
  31 using folly::AtomicHashArray;
  32
  33 // Tunables:
  34 DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
  35 DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
  36 DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
  37 DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
  38
  39 const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
  40 const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
  41
  42 static int64_t nowInUsec() {
  43   timeval tv;
  44   gettimeofday(&tv, 0);
  45   return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
  46 }
  47
  48 TEST(Ahm, BasicStrings) {
  49   typedef AtomicHashMap<int64_t,string> AHM;
  50   AHM myMap(1024);
  51   EXPECT_TRUE(myMap.begin() == myMap.end());
  52
  53   for (int i = 0; i < 100; ++i) {
  54     myMap.insert(make_pair(i, folly::to<string>(i)));
  55   }
  56   for (int i = 0; i < 100; ++i) {
  57     EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
  58   }
  59
  60   myMap.insert(std::make_pair(999, "A"));
  61   myMap.insert(std::make_pair(999, "B"));
  62   EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
  63   myMap.find(999)->second = "B";
  64   myMap.find(999)->second = "C";
  65   EXPECT_EQ(myMap.find(999)->second, "C");
  66   EXPECT_EQ(myMap.find(999)->first, 999);
  67 }
  68
  69
  70 TEST(Ahm, BasicNoncopyable) {
  71   typedef AtomicHashMap<int64_t,std::unique_ptr<int>> AHM;
  72   AHM myMap(1024);
  73   EXPECT_TRUE(myMap.begin() == myMap.end());
  74
  75   for (int i = 0; i < 50; ++i) {
  76     myMap.insert(make_pair(i, std::unique_ptr<int>(new int(i))));
  77   }
  78   for (int i = 50; i < 100; ++i) {
  79     myMap.insert(i, std::unique_ptr<int>(new int (i)));
  80   }
  81   for (int i = 100; i < 150; ++i) {
  82     myMap.emplace(i, new int (i));
  83   }
  84   for (int i = 150; i < 200; ++i) {
  85     myMap.emplace(i, new int (i), std::default_delete<int>());
  86   }
  87   for (int i = 0; i < 200; ++i) {
  88     EXPECT_EQ(*(myMap.find(i)->second), i);
  89   }
  90   for (int i = 0; i < 200; i+=4) {
  91     myMap.erase(i);
  92   }
  93   for (int i = 0; i < 200; i+=4) {
  94     EXPECT_EQ(myMap.find(i), myMap.end());
  95   }
  96 }
  97
  98 typedef int32_t     KeyT;
  99 typedef int32_t     ValueT;
 100
 101 typedef AtomicHashMap<KeyT,ValueT> AHMapT;
 102 typedef AHMapT::value_type RecordT;
 103 typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
 104
 105 AHArrayT::Config config;
 106 static AHArrayT::SmartPtr globalAHA(nullptr);
 107 static std::unique_ptr<AHMapT> globalAHM;
 108
 109 // Generate a deterministic value based on an input key
 110 static int genVal(int key) {
 111   return key / 3;
 112 }
 113
 114 TEST(Ahm, grow) {
 115   VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
 116     sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
 117   uint64_t numEntries = 10000;
 118   float sizeFactor = 0.46;
 119
 120   std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
 121
 122   // load map - make sure we succeed and the index is accurate
 123   bool success = true;
 124   for (uint64_t i = 0; i < numEntries; i++) {
 125     auto ret = m->insert(RecordT(i, genVal(i)));
 126     success &= ret.second;
 127     success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
 128   }
 129   // Overwrite vals to make sure there are no dups
 130   // Every insert should fail because the keys are already in the map.
 131   success = true;
 132   for (uint64_t i = 0; i < numEntries; i++) {
 133     auto ret = m->insert(RecordT(i, genVal(i * 2)));
 134     success &= (ret.second == false);  // fail on collision
 135     success &= (ret.first->second == genVal(i)); // return the previous value
 136     success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
 137   }
 138   EXPECT_TRUE(success);
 139
 140   // check correctness
 141   size_t cap = m->capacity();
 142   ValueT val;
 143   EXPECT_GT(m->numSubMaps(), 1);  // make sure we grew
 144   success = true;
 145   EXPECT_EQ(m->size(), numEntries);
 146   for (size_t i = 0; i < numEntries; i++) {
 147     success &= (m->find(i)->second == genVal(i));
 148   }
 149   EXPECT_TRUE(success);
 150
 151   // Check findAt
 152   success = true;
 153   KeyT key(0);
 154   AHMapT::const_iterator retIt;
 155   for (int32_t i = 0; i < int32_t(numEntries); i++) {
 156     retIt = m->find(i);
 157     retIt = m->findAt(retIt.getIndex());
 158     success &= (retIt->second == genVal(i));
 159     // We use a uint32_t index so that this comparison is between two
 160     // variables of the same type.
 161     success &= (retIt->first == i);
 162   }
 163   EXPECT_TRUE(success);
 164
 165   // Try modifying value
 166   m->find(8)->second = 5309;
 167   EXPECT_EQ(m->find(8)->second, 5309);
 168
 169   // check clear()
 170   m->clear();
 171   success = true;
 172   for (uint64_t i = 0; i < numEntries / 2; i++) {
 173     success &= m->insert(RecordT(i, genVal(i))).second;
 174   }
 175   EXPECT_TRUE(success);
 176   EXPECT_EQ(m->size(), numEntries / 2);
 177 }
 178
 179 TEST(Ahm, iterator) {
 180   int numEntries = 10000;
 181   float sizeFactor = .46;
 182   std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
 183
 184   // load map - make sure we succeed and the index is accurate
 185   for (int i = 0; i < numEntries; i++) {
 186     m->insert(RecordT(i, genVal(i)));
 187   }
 188
 189   bool success = true;
 190   int count = 0;
 191   FOR_EACH(it, *m) {
 192     success &= (it->second == genVal(it->first));
 193     ++count;
 194   }
 195   EXPECT_TRUE(success);
 196   EXPECT_EQ(count, numEntries);
 197 }
 198
 199 class Counters {
 200 private:
 201   // Note: Unfortunately can't currently put a std::atomic<int64_t> in
 202   // the value in ahm since it doesn't support types that are both non-copy
 203   // and non-move constructible yet.
 204   AtomicHashMap<int64_t,int64_t> ahm;
 205
 206 public:
 207   explicit Counters(size_t numCounters) : ahm(numCounters) {}
 208
 209   void increment(int64_t obj_id) {
 210     auto ret = ahm.insert(std::make_pair(obj_id, 1));
 211     if (!ret.second) {
 212       // obj_id already exists, increment count
 213       __sync_fetch_and_add(&ret.first->second, 1);
 214     }
 215   }
 216
 217   int64_t getValue(int64_t obj_id) {
 218     auto ret = ahm.find(obj_id);
 219     return ret != ahm.end() ? ret->second : 0;
 220   }
 221
 222   // export the counters without blocking increments
 223   string toString() {
 224     string ret = "{\n";
 225     ret.reserve(ahm.size() * 32);
 226     for (const auto& e : ahm) {
 227       ret += folly::to<string>(
 228         "  [", e.first, ":", e.second, "]\n");
 229     }
 230     ret += "}\n";
 231     return ret;
 232   }
 233 };
 234
 235 // If you get an error "terminate called without an active exception", there
 236 // might be too many threads getting created - decrease numKeys and/or mult.
 237 TEST(Ahm, counter) {
 238   const int numKeys = 10;
 239   const int mult = 10;
 240   Counters c(numKeys);
 241   vector<int64_t> keys;
 242   FOR_EACH_RANGE(i, 1, numKeys) {
 243     keys.push_back(i);
 244   }
 245   vector<std::thread> threads;
 246   for (auto key : keys) {
 247     FOR_EACH_RANGE(i, 0, key * mult) {
 248       threads.push_back(std::thread([&, key] { c.increment(key); }));
 249     }
 250   }
 251   for (auto& t : threads) {
 252     t.join();
 253   }
 254   string str = c.toString();
 255   for (auto key : keys) {
 256     int val = key * mult;
 257     EXPECT_EQ(val, c.getValue(key));
 258     EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
 259   }
 260 }
 261
 262 class Integer {
 263
 264  public:
 265   explicit Integer(KeyT v = 0) : v_(v) {}
 266
 267   Integer& operator=(const Integer& a) {
 268     static bool throwException_ = false;
 269     throwException_ = !throwException_;
 270     if (throwException_) {
 271       throw 1;
 272     }
 273     v_ = a.v_;
 274     return *this;
 275   }
 276
 277   bool operator==(const Integer& a) const { return v_ == a.v_; }
 278
 279  private:
 280   KeyT v_;
 281 };
 282
 283 TEST(Ahm, map_exception_safety) {
 284   typedef AtomicHashMap<KeyT,Integer> MyMapT;
 285
 286   int numEntries = 10000;
 287   float sizeFactor = 0.46;
 288   std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
 289
 290   bool success = true;
 291   int count = 0;
 292   for (int i = 0; i < numEntries; i++) {
 293     try {
 294       m->insert(i, Integer(genVal(i)));
 295       success &= (m->find(i)->second == Integer(genVal(i)));
 296       ++count;
 297     } catch (...) {
 298       success &= !m->count(i);
 299     }
 300   }
 301   EXPECT_EQ(count, m->size());
 302   EXPECT_TRUE(success);
 303 }
 304
 305 TEST(Ahm, basicErase) {
 306   size_t numEntries = 3000;
 307
 308   std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
 309   // Iterate filling up the map and deleting all keys a few times
 310   // to test more than one subMap.
 311   for (int iterations = 0; iterations < 4; ++iterations) {
 312     // Testing insertion of keys
 313     bool success = true;
 314     for (size_t i = 0; i < numEntries; ++i) {
 315       success &= !(s->count(i));
 316       auto ret = s->insert(RecordT(i, i));
 317       success &= s->count(i);
 318       success &= ret.second;
 319     }
 320     EXPECT_TRUE(success);
 321     EXPECT_EQ(s->size(), numEntries);
 322
 323     // Delete every key in the map and verify that the key is gone and the the
 324     // size is correct.
 325     success = true;
 326     for (size_t i = 0; i < numEntries; ++i) {
 327       success &= s->erase(i);
 328       success &= (s->size() == numEntries - 1 - i);
 329       success &= !(s->count(i));
 330       success &= !(s->erase(i));
 331     }
 332     EXPECT_TRUE(success);
 333   }
 334   VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
 335 }
 336
 337 namespace {
 338
 339 inline KeyT randomizeKey(int key) {
 340   // We deterministically randomize the key to more accurately simulate
 341   // real-world usage, and to avoid pathalogical performance patterns (e.g.
 342   // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
 343   //
 344   // Use a hash function we don't normally use for ints to avoid interactions.
 345   return folly::hash::jenkins_rev_mix32(key);
 346 }
 347
 348 int numOpsPerThread = 0;
 349
 350 void* insertThread(void* jj) {
 351   int64_t j = (int64_t) jj;
 352   for (int i = 0; i < numOpsPerThread; ++i) {
 353     KeyT key = randomizeKey(i + j * numOpsPerThread);
 354     globalAHM->insert(key, genVal(key));
 355   }
 356   return nullptr;
 357 }
 358
 359 void* insertThreadArr(void* jj) {
 360   int64_t j = (int64_t) jj;
 361   for (int i = 0; i < numOpsPerThread; ++i) {
 362     KeyT key = randomizeKey(i + j * numOpsPerThread);
 363     globalAHA->insert(std::make_pair(key, genVal(key)));
 364   }
 365   return nullptr;
 366 }
 367
 368 std::atomic<bool> runThreadsCreatedAllThreads;
 369 void runThreads(void *(*thread)(void*), int numThreads, void **statuses) {
 370   folly::BenchmarkSuspender susp;
 371   runThreadsCreatedAllThreads.store(false);
 372   vector<pthread_t> threadIds;
 373   for (int64_t j = 0; j < numThreads; j++) {
 374     pthread_t tid;
 375     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 376        LOG(ERROR) << "Could not start thread";
 377     } else {
 378       threadIds.push_back(tid);
 379     }
 380   }
 381   susp.dismiss();
 382
 383   runThreadsCreatedAllThreads.store(true);
 384   for (size_t i = 0; i < threadIds.size(); ++i) {
 385     pthread_join(threadIds[i], statuses == nullptr ? nullptr : &statuses[i]);
 386   }
 387 }
 388
 389 void runThreads(void *(*thread)(void*)) {
 390   runThreads(thread, FLAGS_numThreads, nullptr);
 391 }
 392
 393 }
 394
 395 TEST(Ahm, collision_test) {
 396   const int numInserts = 1000000 / 4;
 397
 398   // Doing the same number on each thread so we collide.
 399   numOpsPerThread = numInserts;
 400
 401   float sizeFactor = 0.46;
 402   int entrySize = sizeof(KeyT) + sizeof(ValueT);
 403   VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
 404     " Byte entries replicated in " << FLAGS_numThreads <<
 405     " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
 406
 407   globalAHM.reset(new AHMapT(int(numInserts * sizeFactor), config));
 408
 409   size_t sizeInit = globalAHM->capacity();
 410   VLOG(1) << "  Initial capacity: " << sizeInit;
 411
 412   double start = nowInUsec();
 413   runThreads([](void*) -> void* { // collisionInsertThread
 414     for (int i = 0; i < numOpsPerThread; ++i) {
 415       KeyT key = randomizeKey(i);
 416       globalAHM->insert(key, genVal(key));
 417     }
 418     return nullptr;
 419   });
 420   double elapsed = nowInUsec() - start;
 421
 422   size_t finalCap = globalAHM->capacity();
 423   size_t sizeAHM = globalAHM->size();
 424   VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
 425     " duplicate inserts (atomic).";
 426   VLOG(1) << "  Final capacity: " << finalCap << " in " <<
 427     globalAHM->numSubMaps() << " sub maps (" <<
 428     sizeAHM * 100 / finalCap << "% load factor, " <<
 429     (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
 430
 431   // check correctness
 432   EXPECT_EQ(sizeAHM, numInserts);
 433   bool success = true;
 434   ValueT val;
 435   for (int i = 0; i < numInserts; ++i) {
 436     KeyT key = randomizeKey(i);
 437     success &= (globalAHM->find(key)->second == genVal(key));
 438   }
 439   EXPECT_TRUE(success);
 440
 441   // check colliding finds
 442   start = nowInUsec();
 443   runThreads([](void*) -> void* { // collisionFindThread
 444     KeyT key(0);
 445     for (int i = 0; i < numOpsPerThread; ++i) {
 446       globalAHM->find(key);
 447     }
 448     return nullptr;
 449   });
 450
 451   elapsed = nowInUsec() - start;
 452
 453   VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
 454     " duplicate finds (atomic).";
 455 }
 456
 457 namespace {
 458
 459 const int kInsertPerThread = 100000;
 460 int raceFinalSizeEstimate;
 461
 462 void* raceIterateThread(void* jj) {
 463   int64_t j = (int64_t) jj;
 464   int count = 0;
 465
 466   AHMapT::iterator it = globalAHM->begin();
 467   AHMapT::iterator end = globalAHM->end();
 468   for (; it != end; ++it) {
 469     ++count;
 470     if (count > raceFinalSizeEstimate) {
 471       EXPECT_FALSE("Infinite loop in iterator.");
 472       return nullptr;
 473     }
 474   }
 475   return nullptr;
 476 }
 477
 478 void* raceInsertRandomThread(void* jj) {
 479   int64_t j = (int64_t) jj;
 480   for (int i = 0; i < kInsertPerThread; ++i) {
 481     KeyT key = rand();
 482     globalAHM->insert(key, genVal(key));
 483   }
 484   return nullptr;
 485 }
 486
 487 }
 488
 489 // Test for race conditions when inserting and iterating at the same time and
 490 // creating multiple submaps.
 491 TEST(Ahm, race_insert_iterate_thread_test) {
 492   const int kInsertThreads = 20;
 493   const int kIterateThreads = 20;
 494   raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
 495
 496   VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
 497     << " threads inserting and " << kIterateThreads << " threads iterating.";
 498
 499   globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
 500
 501   vector<pthread_t> threadIds;
 502   for (int64_t j = 0; j < kInsertThreads + kIterateThreads; j++) {
 503     pthread_t tid;
 504     void *(*thread)(void*) =
 505       (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
 506     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 507       LOG(ERROR) << "Could not start thread";
 508     } else {
 509       threadIds.push_back(tid);
 510     }
 511   }
 512   for (size_t i = 0; i < threadIds.size(); ++i) {
 513     pthread_join(threadIds[i], nullptr);
 514   }
 515   VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
 516   VLOG(1) << "Final size of map " << globalAHM->size();
 517 }
 518
 519 namespace {
 520
 521 const int kTestEraseInsertions = 200000;
 522 std::atomic<int32_t> insertedLevel;
 523
 524 void* testEraseInsertThread(void*) {
 525   for (int i = 0; i < kTestEraseInsertions; ++i) {
 526     KeyT key = randomizeKey(i);
 527     globalAHM->insert(key, genVal(key));
 528     insertedLevel.store(i, std::memory_order_release);
 529   }
 530   insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
 531   return nullptr;
 532 }
 533
 534 void* testEraseEraseThread(void*) {
 535   for (int i = 0; i < kTestEraseInsertions; ++i) {
 536     /*
 537      * Make sure that we don't get ahead of the insert thread, because
 538      * part of the condition for this unit test succeeding is that the
 539      * map ends up empty.
 540      *
 541      * Note, there is a subtle case here when a new submap is
 542      * allocated: the erasing thread might get 0 from count(key)
 543      * because it hasn't seen numSubMaps_ update yet.  To avoid this
 544      * race causing problems for the test (it's ok for real usage), we
 545      * lag behind the inserter by more than just element.
 546      */
 547     const int lag = 10;
 548     int currentLevel;
 549     do {
 550       currentLevel = insertedLevel.load(std::memory_order_acquire);
 551       if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
 552     } while (currentLevel - lag < i);
 553
 554     KeyT key = randomizeKey(i);
 555     while (globalAHM->count(key)) {
 556       if (globalAHM->erase(key)) {
 557         break;
 558       }
 559     }
 560   }
 561   return nullptr;
 562 }
 563
 564 }
 565
 566 // Here we have a single thread inserting some values, and several threads
 567 // racing to delete the values in the order they were inserted.
 568 TEST(Ahm, thread_erase_insert_race) {
 569   const int kInsertThreads = 1;
 570   const int kEraseThreads = 10;
 571
 572   VLOG(1) << "Testing insertion and erase with " << kInsertThreads
 573     << " thread inserting and " << kEraseThreads << " threads erasing.";
 574
 575   globalAHM.reset(new AHMapT(kTestEraseInsertions / 4, config));
 576
 577   vector<pthread_t> threadIds;
 578   for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
 579     pthread_t tid;
 580     void *(*thread)(void*) =
 581       (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
 582     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 583       LOG(ERROR) << "Could not start thread";
 584     } else {
 585       threadIds.push_back(tid);
 586     }
 587   }
 588   for (size_t i = 0; i < threadIds.size(); i++) {
 589     pthread_join(threadIds[i], nullptr);
 590   }
 591
 592   EXPECT_TRUE(globalAHM->empty());
 593   EXPECT_EQ(globalAHM->size(), 0);
 594
 595   VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
 596 }
 597
 598 // Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
 599 typedef AtomicHashArray<int32_t, int32_t> AHA;
 600 AHA::Config configRace;
 601 auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
 602 void* atomicHashArrayInsertRaceThread(void* j) {
 603   AHA* arr = atomicHashArrayInsertRaceArray.get();
 604   uintptr_t numInserted = 0;
 605   while (!runThreadsCreatedAllThreads.load());
 606   for (int i = 0; i < 2; i++) {
 607     if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
 608       numInserted++;
 609     }
 610   }
 611   pthread_exit((void *) numInserted);
 612 }
 613 TEST(Ahm, atomic_hash_array_insert_race) {
 614   AHA* arr = atomicHashArrayInsertRaceArray.get();
 615   int numIterations = 5000, FLAGS_numThreads = 4;
 616   void* statuses[FLAGS_numThreads];
 617   for (int i = 0; i < numIterations; i++) {
 618     arr->clear();
 619     runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses);
 620     EXPECT_GE(arr->size(), 1);
 621     for (int j = 0; j < FLAGS_numThreads; j++) {
 622       EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
 623     }
 624   }
 625 }
 626
 627 // Repro for T#5841499. Race between erase() and find() on the same key.
 628 TEST(Ahm, erase_find_race) {
 629   const uint64_t limit = 10000;
 630   AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
 631   std::atomic<uint64_t> key {1};
 632
 633   // Invariant: all values are equal to their keys.
 634   // At any moment there is one or two consecutive keys in the map.
 635
 636   std::thread write_thread([&]() {
 637     while (true) {
 638       uint64_t k = ++key;
 639       if (k > limit) {
 640         break;
 641       }
 642       map.insert(k + 1, k + 1);
 643       map.erase(k);
 644     }
 645   });
 646
 647   std::thread read_thread([&]() {
 648     while (true) {
 649       uint64_t k = key.load();
 650       if (k > limit) {
 651         break;
 652       }
 653
 654       auto it = map.find(k);
 655       if (it != map.end()) {
 656         ASSERT_EQ(k, it->second);
 657       }
 658     }
 659   });
 660
 661   read_thread.join();
 662   write_thread.join();
 663 }
 664
 665 // Repro for a bug when iterator didn't skip empty submaps.
 666 TEST(Ahm, iterator_skips_empty_submaps) {
 667   AtomicHashMap<uint64_t, uint64_t>::Config config;
 668   config.growthFactor = 1;
 669
 670   AtomicHashMap<uint64_t, uint64_t> map(1, config);
 671
 672   map.insert(1, 1);
 673   map.insert(2, 2);
 674   map.insert(3, 3);
 675
 676   map.erase(2);
 677
 678   auto it = map.find(1);
 679
 680   ASSERT_NE(map.end(), it);
 681   ASSERT_EQ(1, it->first);
 682   ASSERT_EQ(1, it->second);
 683
 684   ++it;
 685
 686   ASSERT_NE(map.end(), it);
 687   ASSERT_EQ(3, it->first);
 688   ASSERT_EQ(3, it->second);
 689
 690   ++it;
 691   ASSERT_EQ(map.end(), it);
 692 }
 693
 694 namespace {
 695
 696 void loadGlobalAha() {
 697   std::cout << "loading global AHA with " << FLAGS_numThreads
 698             << " threads...\n";
 699   uint64_t start = nowInUsec();
 700   globalAHA = AHArrayT::create(maxBMElements, config);
 701   numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
 702   CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
 703     "kNumThreads must evenly divide kNumInserts.";
 704   runThreads(insertThreadArr);
 705   uint64_t elapsed = nowInUsec() - start;
 706   std::cout << "  took " << elapsed / 1000 << " ms (" <<
 707     (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
 708   EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
 709 }
 710
 711 void loadGlobalAhm() {
 712   std::cout << "loading global AHM with " << FLAGS_numThreads
 713             << " threads...\n";
 714   uint64_t start = nowInUsec();
 715   globalAHM.reset(new AHMapT(maxBMElements, config));
 716   numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
 717   runThreads(insertThread);
 718   uint64_t elapsed = nowInUsec() - start;
 719   std::cout << "  took " << elapsed / 1000 << " ms (" <<
 720     (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
 721   EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
 722 }
 723
 724 }
 725
 726 BENCHMARK(st_aha_find, iters) {
 727   CHECK_LE(iters, FLAGS_numBMElements);
 728   for (size_t i = 0; i < iters; i++) {
 729     KeyT key = randomizeKey(i);
 730     folly::doNotOptimizeAway(globalAHA->find(key)->second);
 731   }
 732 }
 733
 734 BENCHMARK(st_ahm_find, iters) {
 735   CHECK_LE(iters, FLAGS_numBMElements);
 736   for (size_t i = 0; i < iters; i++) {
 737     KeyT key = randomizeKey(i);
 738     folly::doNotOptimizeAway(globalAHM->find(key)->second);
 739   }
 740 }
 741
 742 BENCHMARK_DRAW_LINE()
 743
 744 BENCHMARK(mt_ahm_miss, iters) {
 745   CHECK_LE(iters, FLAGS_numBMElements);
 746   numOpsPerThread = iters / FLAGS_numThreads;
 747   runThreads([](void* jj) -> void* {
 748     int64_t j = (int64_t) jj;
 749     while (!runThreadsCreatedAllThreads.load());
 750     for (int i = 0; i < numOpsPerThread; ++i) {
 751       KeyT key = i + j * numOpsPerThread * 100;
 752       folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
 753     }
 754     return nullptr;
 755   });
 756 }
 757
 758 BENCHMARK(st_ahm_miss, iters) {
 759   CHECK_LE(iters, FLAGS_numBMElements);
 760   for (size_t i = 0; i < iters; i++) {
 761     KeyT key = randomizeKey(i + iters * 100);
 762     folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
 763   }
 764 }
 765
 766 BENCHMARK(mt_ahm_find_insert_mix, iters) {
 767   CHECK_LE(iters, FLAGS_numBMElements);
 768   numOpsPerThread = iters / FLAGS_numThreads;
 769   runThreads([](void* jj) -> void* {
 770     int64_t j = (int64_t) jj;
 771     while (!runThreadsCreatedAllThreads.load());
 772     for (int i = 0; i < numOpsPerThread; ++i) {
 773       if (i % 128) {  // ~1% insert mix
 774         KeyT key = randomizeKey(i + j * numOpsPerThread);
 775         folly::doNotOptimizeAway(globalAHM->find(key)->second);
 776       } else {
 777         KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
 778         globalAHM->insert(key, genVal(key));
 779       }
 780     }
 781     return nullptr;
 782   });
 783 }
 784
 785 BENCHMARK(mt_aha_find, iters) {
 786   CHECK_LE(iters, FLAGS_numBMElements);
 787   numOpsPerThread = iters / FLAGS_numThreads;
 788   runThreads([](void* jj) -> void* {
 789       int64_t j = (int64_t) jj;
 790       while (!runThreadsCreatedAllThreads.load());
 791       for (int i = 0; i < numOpsPerThread; ++i) {
 792         KeyT key = randomizeKey(i + j * numOpsPerThread);
 793         folly::doNotOptimizeAway(globalAHA->find(key)->second);
 794       }
 795       return nullptr;
 796     });
 797 }
 798
 799 BENCHMARK(mt_ahm_find, iters) {
 800   CHECK_LE(iters, FLAGS_numBMElements);
 801   numOpsPerThread = iters / FLAGS_numThreads;
 802   runThreads([](void* jj) -> void* {
 803     int64_t j = (int64_t) jj;
 804     while (!runThreadsCreatedAllThreads.load());
 805     for (int i = 0; i < numOpsPerThread; ++i) {
 806       KeyT key = randomizeKey(i + j * numOpsPerThread);
 807       folly::doNotOptimizeAway(globalAHM->find(key)->second);
 808     }
 809     return nullptr;
 810   });
 811 }
 812
 813 KeyT k;
 814 BENCHMARK(st_baseline_modulus_and_random, iters) {
 815   for (size_t i = 0; i < iters; ++i) {
 816     k = randomizeKey(i) % iters;
 817   }
 818 }
 819
 820 // insertions go last because they reset the map
 821
 822 BENCHMARK(mt_ahm_insert, iters) {
 823   BENCHMARK_SUSPEND {
 824     globalAHM.reset(new AHMapT(int(iters * LF), config));
 825     numOpsPerThread = iters / FLAGS_numThreads;
 826   }
 827   runThreads(insertThread);
 828 }
 829
 830 BENCHMARK(st_ahm_insert, iters) {
 831   folly::BenchmarkSuspender susp;
 832   std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
 833   susp.dismiss();
 834
 835   for (size_t i = 0; i < iters; i++) {
 836     KeyT key = randomizeKey(i);
 837     ahm->insert(key, genVal(key));
 838   }
 839 }
 840
 841 void benchmarkSetup() {
 842   config.maxLoadFactor = FLAGS_maxLoadFactor;
 843   configRace.maxLoadFactor = 0.5;
 844   int numCores = sysconf(_SC_NPROCESSORS_ONLN);
 845   loadGlobalAha();
 846   loadGlobalAhm();
 847   string numIters = folly::to<string>(
 848     std::min(1000000, int(FLAGS_numBMElements)));
 849
 850   gflags::SetCommandLineOptionWithMode(
 851     "bm_max_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
 852   );
 853   gflags::SetCommandLineOptionWithMode(
 854     "bm_min_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
 855   );
 856   string numCoresStr = folly::to<string>(numCores);
 857   gflags::SetCommandLineOptionWithMode(
 858     "numThreads", numCoresStr.c_str(), gflags::SET_FLAG_IF_DEFAULT
 859   );
 860
 861   std::cout << "\nRunning AHM benchmarks on machine with " << numCores
 862     << " logical cores.\n"
 863        "  num elements per map: " << FLAGS_numBMElements << "\n"
 864     << "  num threads for mt tests: " << FLAGS_numThreads << "\n"
 865     << "  AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
 866 }
 867
 868 int main(int argc, char** argv) {
 869   testing::InitGoogleTest(&argc, argv);
 870   gflags::ParseCommandLineFlags(&argc, &argv, true);
 871   auto ret = RUN_ALL_TESTS();
 872   if (!ret && FLAGS_benchmark) {
 873     benchmarkSetup();
 874     folly::runBenchmarks();
 875   }
 876   return ret;
 877 }
 878
 879 /*
 880 Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
 881   (12 physical cores, 12 MB cache, 72 GB RAM)
 882
 883 Running AHM benchmarks on machine with 24 logical cores.
 884   num elements per map: 12000000
 885   num threads for mt tests: 24
 886   AHM load factor: 0.75
 887
 888 Benchmark                               Iters   Total t    t/iter iter/sec
 889 ------------------------------------------------------------------------------
 890 Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find
 891 *       BM_mt_aha_find                1000000  7.767 ms  7.767 ns  122.8 M
 892  +0.81% BM_mt_ahm_find                1000000   7.83 ms   7.83 ns  121.8 M
 893 ------------------------------------------------------------------------------
 894 Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find
 895 *       BM_st_aha_find                1000000  57.83 ms  57.83 ns  16.49 M
 896  +77.9% BM_st_ahm_find                1000000  102.9 ms  102.9 ns   9.27 M
 897 ------------------------------------------------------------------------------
 898 BM_mt_ahm_miss                        1000000  2.937 ms  2.937 ns  324.7 M
 899 BM_st_ahm_miss                        1000000  164.2 ms  164.2 ns  5.807 M
 900 BM_mt_ahm_find_insert_mix             1000000  8.797 ms  8.797 ns  108.4 M
 901 BM_mt_ahm_insert                      1000000  17.39 ms  17.39 ns  54.83 M
 902 BM_st_ahm_insert                      1000000  106.8 ms  106.8 ns   8.93 M
 903 BM_st_baseline_modulus_and_rando      1000000  6.223 ms  6.223 ns  153.2 M
 904 */