folly/test/AtomicHashMapTest.cpp

   1 /*
   2  * Copyright 2014 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/AtomicHashMap.h>
  18
  19 #include <glog/logging.h>
  20 #include <gtest/gtest.h>
  21 #include <sys/time.h>
  22 #include <thread>
  23 #include <atomic>
  24 #include <memory>
  25 #include <folly/Benchmark.h>
  26 #include <folly/Conv.h>
  27
  28 using std::vector;
  29 using std::string;
  30 using folly::AtomicHashMap;
  31 using folly::AtomicHashArray;
  32
  33 // Tunables:
  34 DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
  35 DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
  36 DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
  37 DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
  38
  39 const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
  40 const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
  41
  42 static int64_t nowInUsec() {
  43   timeval tv;
  44   gettimeofday(&tv, 0);
  45   return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
  46 }
  47
  48 TEST(Ahm, BasicStrings) {
  49   typedef AtomicHashMap<int64_t,string> AHM;
  50   AHM myMap(1024);
  51   EXPECT_TRUE(myMap.begin() == myMap.end());
  52
  53   for (int i = 0; i < 100; ++i) {
  54     myMap.insert(make_pair(i, folly::to<string>(i)));
  55   }
  56   for (int i = 0; i < 100; ++i) {
  57     EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
  58   }
  59
  60   myMap.insert(std::make_pair(999, "A"));
  61   myMap.insert(std::make_pair(999, "B"));
  62   EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
  63   myMap.find(999)->second = "B";
  64   myMap.find(999)->second = "C";
  65   EXPECT_EQ(myMap.find(999)->second, "C");
  66   EXPECT_EQ(myMap.find(999)->first, 999);
  67 }
  68
  69
  70 TEST(Ahm, BasicNoncopyable) {
  71   typedef AtomicHashMap<int64_t,std::unique_ptr<int>> AHM;
  72   AHM myMap(1024);
  73   EXPECT_TRUE(myMap.begin() == myMap.end());
  74
  75   for (int i = 0; i < 50; ++i) {
  76     myMap.insert(make_pair(i, std::unique_ptr<int>(new int(i))));
  77   }
  78   for (int i = 50; i < 100; ++i) {
  79     myMap.insert(i, std::unique_ptr<int>(new int (i)));
  80   }
  81   for (int i = 0; i < 100; ++i) {
  82     EXPECT_EQ(*(myMap.find(i)->second), i);
  83   }
  84   for (int i = 0; i < 100; i+=4) {
  85     myMap.erase(i);
  86   }
  87   for (int i = 0; i < 100; i+=4) {
  88     EXPECT_EQ(myMap.find(i), myMap.end());
  89   }
  90 }
  91
  92 typedef int32_t     KeyT;
  93 typedef int32_t     ValueT;
  94
  95 typedef AtomicHashMap<KeyT,ValueT> AHMapT;
  96 typedef AHMapT::value_type RecordT;
  97 typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
  98
  99 AHArrayT::Config config;
 100 static AHArrayT::SmartPtr globalAHA(nullptr);
 101 static std::unique_ptr<AHMapT> globalAHM;
 102
 103 // Generate a deterministic value based on an input key
 104 static int genVal(int key) {
 105   return key / 3;
 106 }
 107
 108 TEST(Ahm, grow) {
 109   VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
 110     sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
 111   uint64_t numEntries = 10000;
 112   float sizeFactor = 0.46;
 113
 114   std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
 115
 116   // load map - make sure we succeed and the index is accurate
 117   bool success = true;
 118   for (uint64_t i = 0; i < numEntries; i++) {
 119     auto ret = m->insert(RecordT(i, genVal(i)));
 120     success &= ret.second;
 121     success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
 122   }
 123   // Overwrite vals to make sure there are no dups
 124   // Every insert should fail because the keys are already in the map.
 125   success = true;
 126   for (uint64_t i = 0; i < numEntries; i++) {
 127     auto ret = m->insert(RecordT(i, genVal(i * 2)));
 128     success &= (ret.second == false);  // fail on collision
 129     success &= (ret.first->second == genVal(i)); // return the previous value
 130     success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
 131   }
 132   EXPECT_TRUE(success);
 133
 134   // check correctness
 135   size_t cap = m->capacity();
 136   ValueT val;
 137   EXPECT_GT(m->numSubMaps(), 1);  // make sure we grew
 138   success = true;
 139   EXPECT_EQ(m->size(), numEntries);
 140   for (size_t i = 0; i < numEntries; i++) {
 141     success &= (m->find(i)->second == genVal(i));
 142   }
 143   EXPECT_TRUE(success);
 144
 145   // Check findAt
 146   success = true;
 147   KeyT key(0);
 148   AHMapT::const_iterator retIt;
 149   for (int32_t i = 0; i < int32_t(numEntries); i++) {
 150     retIt = m->find(i);
 151     retIt = m->findAt(retIt.getIndex());
 152     success &= (retIt->second == genVal(i));
 153     // We use a uint32_t index so that this comparison is between two
 154     // variables of the same type.
 155     success &= (retIt->first == i);
 156   }
 157   EXPECT_TRUE(success);
 158
 159   // Try modifying value
 160   m->find(8)->second = 5309;
 161   EXPECT_EQ(m->find(8)->second, 5309);
 162
 163   // check clear()
 164   m->clear();
 165   success = true;
 166   for (uint64_t i = 0; i < numEntries / 2; i++) {
 167     success &= m->insert(RecordT(i, genVal(i))).second;
 168   }
 169   EXPECT_TRUE(success);
 170   EXPECT_EQ(m->size(), numEntries / 2);
 171 }
 172
 173 TEST(Ahm, iterator) {
 174   int numEntries = 10000;
 175   float sizeFactor = .46;
 176   std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
 177
 178   // load map - make sure we succeed and the index is accurate
 179   for (int i = 0; i < numEntries; i++) {
 180     m->insert(RecordT(i, genVal(i)));
 181   }
 182
 183   bool success = true;
 184   int count = 0;
 185   FOR_EACH(it, *m) {
 186     success &= (it->second == genVal(it->first));
 187     ++count;
 188   }
 189   EXPECT_TRUE(success);
 190   EXPECT_EQ(count, numEntries);
 191 }
 192
 193 class Counters {
 194 private:
 195   // Note: Unfortunately can't currently put a std::atomic<int64_t> in
 196   // the value in ahm since it doesn't support types that are both non-copy
 197   // and non-move constructible yet.
 198   AtomicHashMap<int64_t,int64_t> ahm;
 199
 200 public:
 201   explicit Counters(size_t numCounters) : ahm(numCounters) {}
 202
 203   void increment(int64_t obj_id) {
 204     auto ret = ahm.insert(std::make_pair(obj_id, 1));
 205     if (!ret.second) {
 206       // obj_id already exists, increment count
 207       __sync_fetch_and_add(&ret.first->second, 1);
 208     }
 209   }
 210
 211   int64_t getValue(int64_t obj_id) {
 212     auto ret = ahm.find(obj_id);
 213     return ret != ahm.end() ? ret->second : 0;
 214   }
 215
 216   // export the counters without blocking increments
 217   string toString() {
 218     string ret = "{\n";
 219     ret.reserve(ahm.size() * 32);
 220     for (const auto& e : ahm) {
 221       ret += folly::to<string>(
 222         "  [", e.first, ":", e.second, "]\n");
 223     }
 224     ret += "}\n";
 225     return ret;
 226   }
 227 };
 228
 229 // If you get an error "terminate called without an active exception", there
 230 // might be too many threads getting created - decrease numKeys and/or mult.
 231 TEST(Ahm, counter) {
 232   const int numKeys = 10;
 233   const int mult = 10;
 234   Counters c(numKeys);
 235   vector<int64_t> keys;
 236   FOR_EACH_RANGE(i, 1, numKeys) {
 237     keys.push_back(i);
 238   }
 239   vector<std::thread> threads;
 240   for (auto key : keys) {
 241     FOR_EACH_RANGE(i, 0, key * mult) {
 242       threads.push_back(std::thread([&, key] { c.increment(key); }));
 243     }
 244   }
 245   for (auto& t : threads) {
 246     t.join();
 247   }
 248   string str = c.toString();
 249   for (auto key : keys) {
 250     int val = key * mult;
 251     EXPECT_EQ(val, c.getValue(key));
 252     EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
 253   }
 254 }
 255
 256 class Integer {
 257
 258  public:
 259   explicit Integer(KeyT v = 0) : v_(v) {}
 260
 261   Integer& operator=(const Integer& a) {
 262     static bool throwException_ = false;
 263     throwException_ = !throwException_;
 264     if (throwException_) {
 265       throw 1;
 266     }
 267     v_ = a.v_;
 268     return *this;
 269   }
 270
 271   bool operator==(const Integer& a) const { return v_ == a.v_; }
 272
 273  private:
 274   KeyT v_;
 275 };
 276
 277 TEST(Ahm, map_exception_safety) {
 278   typedef AtomicHashMap<KeyT,Integer> MyMapT;
 279
 280   int numEntries = 10000;
 281   float sizeFactor = 0.46;
 282   std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
 283
 284   bool success = true;
 285   int count = 0;
 286   for (int i = 0; i < numEntries; i++) {
 287     try {
 288       m->insert(i, Integer(genVal(i)));
 289       success &= (m->find(i)->second == Integer(genVal(i)));
 290       ++count;
 291     } catch (...) {
 292       success &= !m->count(i);
 293     }
 294   }
 295   EXPECT_EQ(count, m->size());
 296   EXPECT_TRUE(success);
 297 }
 298
 299 TEST(Ahm, basicErase) {
 300   size_t numEntries = 3000;
 301
 302   std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
 303   // Iterate filling up the map and deleting all keys a few times
 304   // to test more than one subMap.
 305   for (int iterations = 0; iterations < 4; ++iterations) {
 306     // Testing insertion of keys
 307     bool success = true;
 308     for (size_t i = 0; i < numEntries; ++i) {
 309       success &= !(s->count(i));
 310       auto ret = s->insert(RecordT(i, i));
 311       success &= s->count(i);
 312       success &= ret.second;
 313     }
 314     EXPECT_TRUE(success);
 315     EXPECT_EQ(s->size(), numEntries);
 316
 317     // Delete every key in the map and verify that the key is gone and the the
 318     // size is correct.
 319     success = true;
 320     for (size_t i = 0; i < numEntries; ++i) {
 321       success &= s->erase(i);
 322       success &= (s->size() == numEntries - 1 - i);
 323       success &= !(s->count(i));
 324       success &= !(s->erase(i));
 325     }
 326     EXPECT_TRUE(success);
 327   }
 328   VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
 329 }
 330
 331 namespace {
 332
 333 inline KeyT randomizeKey(int key) {
 334   // We deterministically randomize the key to more accurately simulate
 335   // real-world usage, and to avoid pathalogical performance patterns (e.g.
 336   // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
 337   //
 338   // Use a hash function we don't normally use for ints to avoid interactions.
 339   return folly::hash::jenkins_rev_mix32(key);
 340 }
 341
 342 int numOpsPerThread = 0;
 343
 344 void* insertThread(void* jj) {
 345   int64_t j = (int64_t) jj;
 346   for (int i = 0; i < numOpsPerThread; ++i) {
 347     KeyT key = randomizeKey(i + j * numOpsPerThread);
 348     globalAHM->insert(key, genVal(key));
 349   }
 350   return nullptr;
 351 }
 352
 353 void* insertThreadArr(void* jj) {
 354   int64_t j = (int64_t) jj;
 355   for (int i = 0; i < numOpsPerThread; ++i) {
 356     KeyT key = randomizeKey(i + j * numOpsPerThread);
 357     globalAHA->insert(std::make_pair(key, genVal(key)));
 358   }
 359   return nullptr;
 360 }
 361
 362 std::atomic<bool> runThreadsCreatedAllThreads;
 363 void runThreads(void *(*thread)(void*), int numThreads, void **statuses) {
 364   folly::BenchmarkSuspender susp;
 365   runThreadsCreatedAllThreads.store(false);
 366   vector<pthread_t> threadIds;
 367   for (int64_t j = 0; j < numThreads; j++) {
 368     pthread_t tid;
 369     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 370        LOG(ERROR) << "Could not start thread";
 371     } else {
 372       threadIds.push_back(tid);
 373     }
 374   }
 375   susp.dismiss();
 376
 377   runThreadsCreatedAllThreads.store(true);
 378   for (size_t i = 0; i < threadIds.size(); ++i) {
 379     pthread_join(threadIds[i], statuses == nullptr ? nullptr : &statuses[i]);
 380   }
 381 }
 382
 383 void runThreads(void *(*thread)(void*)) {
 384   runThreads(thread, FLAGS_numThreads, nullptr);
 385 }
 386
 387 }
 388
 389 TEST(Ahm, collision_test) {
 390   const int numInserts = 1000000 / 4;
 391
 392   // Doing the same number on each thread so we collide.
 393   numOpsPerThread = numInserts;
 394
 395   float sizeFactor = 0.46;
 396   int entrySize = sizeof(KeyT) + sizeof(ValueT);
 397   VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
 398     " Byte entries replicated in " << FLAGS_numThreads <<
 399     " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
 400
 401   globalAHM.reset(new AHMapT(int(numInserts * sizeFactor), config));
 402
 403   size_t sizeInit = globalAHM->capacity();
 404   VLOG(1) << "  Initial capacity: " << sizeInit;
 405
 406   double start = nowInUsec();
 407   runThreads([](void*) -> void* { // collisionInsertThread
 408     for (int i = 0; i < numOpsPerThread; ++i) {
 409       KeyT key = randomizeKey(i);
 410       globalAHM->insert(key, genVal(key));
 411     }
 412     return nullptr;
 413   });
 414   double elapsed = nowInUsec() - start;
 415
 416   size_t finalCap = globalAHM->capacity();
 417   size_t sizeAHM = globalAHM->size();
 418   VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
 419     " duplicate inserts (atomic).";
 420   VLOG(1) << "  Final capacity: " << finalCap << " in " <<
 421     globalAHM->numSubMaps() << " sub maps (" <<
 422     sizeAHM * 100 / finalCap << "% load factor, " <<
 423     (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
 424
 425   // check correctness
 426   EXPECT_EQ(sizeAHM, numInserts);
 427   bool success = true;
 428   ValueT val;
 429   for (int i = 0; i < numInserts; ++i) {
 430     KeyT key = randomizeKey(i);
 431     success &= (globalAHM->find(key)->second == genVal(key));
 432   }
 433   EXPECT_TRUE(success);
 434
 435   // check colliding finds
 436   start = nowInUsec();
 437   runThreads([](void*) -> void* { // collisionFindThread
 438     KeyT key(0);
 439     for (int i = 0; i < numOpsPerThread; ++i) {
 440       globalAHM->find(key);
 441     }
 442     return nullptr;
 443   });
 444
 445   elapsed = nowInUsec() - start;
 446
 447   VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
 448     " duplicate finds (atomic).";
 449 }
 450
 451 namespace {
 452
 453 const int kInsertPerThread = 100000;
 454 int raceFinalSizeEstimate;
 455
 456 void* raceIterateThread(void* jj) {
 457   int64_t j = (int64_t) jj;
 458   int count = 0;
 459
 460   AHMapT::iterator it = globalAHM->begin();
 461   AHMapT::iterator end = globalAHM->end();
 462   for (; it != end; ++it) {
 463     ++count;
 464     if (count > raceFinalSizeEstimate) {
 465       EXPECT_FALSE("Infinite loop in iterator.");
 466       return nullptr;
 467     }
 468   }
 469   return nullptr;
 470 }
 471
 472 void* raceInsertRandomThread(void* jj) {
 473   int64_t j = (int64_t) jj;
 474   for (int i = 0; i < kInsertPerThread; ++i) {
 475     KeyT key = rand();
 476     globalAHM->insert(key, genVal(key));
 477   }
 478   return nullptr;
 479 }
 480
 481 }
 482
 483 // Test for race conditions when inserting and iterating at the same time and
 484 // creating multiple submaps.
 485 TEST(Ahm, race_insert_iterate_thread_test) {
 486   const int kInsertThreads = 20;
 487   const int kIterateThreads = 20;
 488   raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
 489
 490   VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
 491     << " threads inserting and " << kIterateThreads << " threads iterating.";
 492
 493   globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
 494
 495   vector<pthread_t> threadIds;
 496   for (int64_t j = 0; j < kInsertThreads + kIterateThreads; j++) {
 497     pthread_t tid;
 498     void *(*thread)(void*) =
 499       (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
 500     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 501       LOG(ERROR) << "Could not start thread";
 502     } else {
 503       threadIds.push_back(tid);
 504     }
 505   }
 506   for (size_t i = 0; i < threadIds.size(); ++i) {
 507     pthread_join(threadIds[i], nullptr);
 508   }
 509   VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
 510   VLOG(1) << "Final size of map " << globalAHM->size();
 511 }
 512
 513 namespace {
 514
 515 const int kTestEraseInsertions = 200000;
 516 std::atomic<int32_t> insertedLevel;
 517
 518 void* testEraseInsertThread(void*) {
 519   for (int i = 0; i < kTestEraseInsertions; ++i) {
 520     KeyT key = randomizeKey(i);
 521     globalAHM->insert(key, genVal(key));
 522     insertedLevel.store(i, std::memory_order_release);
 523   }
 524   insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
 525   return nullptr;
 526 }
 527
 528 void* testEraseEraseThread(void*) {
 529   for (int i = 0; i < kTestEraseInsertions; ++i) {
 530     /*
 531      * Make sure that we don't get ahead of the insert thread, because
 532      * part of the condition for this unit test succeeding is that the
 533      * map ends up empty.
 534      *
 535      * Note, there is a subtle case here when a new submap is
 536      * allocated: the erasing thread might get 0 from count(key)
 537      * because it hasn't seen numSubMaps_ update yet.  To avoid this
 538      * race causing problems for the test (it's ok for real usage), we
 539      * lag behind the inserter by more than just element.
 540      */
 541     const int lag = 10;
 542     int currentLevel;
 543     do {
 544       currentLevel = insertedLevel.load(std::memory_order_acquire);
 545       if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
 546     } while (currentLevel - lag < i);
 547
 548     KeyT key = randomizeKey(i);
 549     while (globalAHM->count(key)) {
 550       if (globalAHM->erase(key)) {
 551         break;
 552       }
 553     }
 554   }
 555   return nullptr;
 556 }
 557
 558 }
 559
 560 // Here we have a single thread inserting some values, and several threads
 561 // racing to delete the values in the order they were inserted.
 562 TEST(Ahm, thread_erase_insert_race) {
 563   const int kInsertThreads = 1;
 564   const int kEraseThreads = 10;
 565
 566   VLOG(1) << "Testing insertion and erase with " << kInsertThreads
 567     << " thread inserting and " << kEraseThreads << " threads erasing.";
 568
 569   globalAHM.reset(new AHMapT(kTestEraseInsertions / 4, config));
 570
 571   vector<pthread_t> threadIds;
 572   for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
 573     pthread_t tid;
 574     void *(*thread)(void*) =
 575       (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
 576     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 577       LOG(ERROR) << "Could not start thread";
 578     } else {
 579       threadIds.push_back(tid);
 580     }
 581   }
 582   for (size_t i = 0; i < threadIds.size(); i++) {
 583     pthread_join(threadIds[i], nullptr);
 584   }
 585
 586   EXPECT_TRUE(globalAHM->empty());
 587   EXPECT_EQ(globalAHM->size(), 0);
 588
 589   VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
 590 }
 591
 592 // Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
 593 typedef AtomicHashArray<int32_t, int32_t> AHA;
 594 AHA::Config configRace;
 595 auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
 596 void* atomicHashArrayInsertRaceThread(void* j) {
 597   AHA* arr = atomicHashArrayInsertRaceArray.get();
 598   uintptr_t numInserted = 0;
 599   while (!runThreadsCreatedAllThreads.load());
 600   for (int i = 0; i < 2; i++) {
 601     if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
 602       numInserted++;
 603     }
 604   }
 605   pthread_exit((void *) numInserted);
 606 }
 607 TEST(Ahm, atomic_hash_array_insert_race) {
 608   AHA* arr = atomicHashArrayInsertRaceArray.get();
 609   int numIterations = 50000, FLAGS_numThreads = 4;
 610   void* statuses[FLAGS_numThreads];
 611   for (int i = 0; i < numIterations; i++) {
 612     arr->clear();
 613     runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses);
 614     EXPECT_GE(arr->size(), 1);
 615     for (int j = 0; j < FLAGS_numThreads; j++) {
 616       EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
 617     }
 618   }
 619 }
 620
 621 namespace {
 622
 623 void loadGlobalAha() {
 624   std::cout << "loading global AHA with " << FLAGS_numThreads
 625             << " threads...\n";
 626   uint64_t start = nowInUsec();
 627   globalAHA = AHArrayT::create(maxBMElements, config);
 628   numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
 629   CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
 630     "kNumThreads must evenly divide kNumInserts.";
 631   runThreads(insertThreadArr);
 632   uint64_t elapsed = nowInUsec() - start;
 633   std::cout << "  took " << elapsed / 1000 << " ms (" <<
 634     (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
 635   EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
 636 }
 637
 638 void loadGlobalAhm() {
 639   std::cout << "loading global AHM with " << FLAGS_numThreads
 640             << " threads...\n";
 641   uint64_t start = nowInUsec();
 642   globalAHM.reset(new AHMapT(maxBMElements, config));
 643   numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
 644   runThreads(insertThread);
 645   uint64_t elapsed = nowInUsec() - start;
 646   std::cout << "  took " << elapsed / 1000 << " ms (" <<
 647     (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
 648   EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
 649 }
 650
 651 }
 652
 653 BENCHMARK(st_aha_find, iters) {
 654   CHECK_LE(iters, FLAGS_numBMElements);
 655   for (size_t i = 0; i < iters; i++) {
 656     KeyT key = randomizeKey(i);
 657     folly::doNotOptimizeAway(globalAHA->find(key)->second);
 658   }
 659 }
 660
 661 BENCHMARK(st_ahm_find, iters) {
 662   CHECK_LE(iters, FLAGS_numBMElements);
 663   for (size_t i = 0; i < iters; i++) {
 664     KeyT key = randomizeKey(i);
 665     folly::doNotOptimizeAway(globalAHM->find(key)->second);
 666   }
 667 }
 668
 669 BENCHMARK_DRAW_LINE()
 670
 671 BENCHMARK(mt_ahm_miss, iters) {
 672   CHECK_LE(iters, FLAGS_numBMElements);
 673   numOpsPerThread = iters / FLAGS_numThreads;
 674   runThreads([](void* jj) -> void* {
 675     int64_t j = (int64_t) jj;
 676     while (!runThreadsCreatedAllThreads.load());
 677     for (int i = 0; i < numOpsPerThread; ++i) {
 678       KeyT key = i + j * numOpsPerThread * 100;
 679       folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
 680     }
 681     return nullptr;
 682   });
 683 }
 684
 685 BENCHMARK(st_ahm_miss, iters) {
 686   CHECK_LE(iters, FLAGS_numBMElements);
 687   for (size_t i = 0; i < iters; i++) {
 688     KeyT key = randomizeKey(i + iters * 100);
 689     folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
 690   }
 691 }
 692
 693 BENCHMARK(mt_ahm_find_insert_mix, iters) {
 694   CHECK_LE(iters, FLAGS_numBMElements);
 695   numOpsPerThread = iters / FLAGS_numThreads;
 696   runThreads([](void* jj) -> void* {
 697     int64_t j = (int64_t) jj;
 698     while (!runThreadsCreatedAllThreads.load());
 699     for (int i = 0; i < numOpsPerThread; ++i) {
 700       if (i % 128) {  // ~1% insert mix
 701         KeyT key = randomizeKey(i + j * numOpsPerThread);
 702         folly::doNotOptimizeAway(globalAHM->find(key)->second);
 703       } else {
 704         KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
 705         globalAHM->insert(key, genVal(key));
 706       }
 707     }
 708     return nullptr;
 709   });
 710 }
 711
 712 BENCHMARK(mt_aha_find, iters) {
 713   CHECK_LE(iters, FLAGS_numBMElements);
 714   numOpsPerThread = iters / FLAGS_numThreads;
 715   runThreads([](void* jj) -> void* {
 716       int64_t j = (int64_t) jj;
 717       while (!runThreadsCreatedAllThreads.load());
 718       for (int i = 0; i < numOpsPerThread; ++i) {
 719         KeyT key = randomizeKey(i + j * numOpsPerThread);
 720         folly::doNotOptimizeAway(globalAHA->find(key)->second);
 721       }
 722       return nullptr;
 723     });
 724 }
 725
 726 BENCHMARK(mt_ahm_find, iters) {
 727   CHECK_LE(iters, FLAGS_numBMElements);
 728   numOpsPerThread = iters / FLAGS_numThreads;
 729   runThreads([](void* jj) -> void* {
 730     int64_t j = (int64_t) jj;
 731     while (!runThreadsCreatedAllThreads.load());
 732     for (int i = 0; i < numOpsPerThread; ++i) {
 733       KeyT key = randomizeKey(i + j * numOpsPerThread);
 734       folly::doNotOptimizeAway(globalAHM->find(key)->second);
 735     }
 736     return nullptr;
 737   });
 738 }
 739
 740 KeyT k;
 741 BENCHMARK(st_baseline_modulus_and_random, iters) {
 742   for (size_t i = 0; i < iters; ++i) {
 743     k = randomizeKey(i) % iters;
 744   }
 745 }
 746
 747 // insertions go last because they reset the map
 748
 749 BENCHMARK(mt_ahm_insert, iters) {
 750   BENCHMARK_SUSPEND {
 751     globalAHM.reset(new AHMapT(int(iters * LF), config));
 752     numOpsPerThread = iters / FLAGS_numThreads;
 753   }
 754   runThreads(insertThread);
 755 }
 756
 757 BENCHMARK(st_ahm_insert, iters) {
 758   folly::BenchmarkSuspender susp;
 759   std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
 760   susp.dismiss();
 761
 762   for (size_t i = 0; i < iters; i++) {
 763     KeyT key = randomizeKey(i);
 764     ahm->insert(key, genVal(key));
 765   }
 766 }
 767
 768 void benchmarkSetup() {
 769   config.maxLoadFactor = FLAGS_maxLoadFactor;
 770   configRace.maxLoadFactor = 0.5;
 771   int numCores = sysconf(_SC_NPROCESSORS_ONLN);
 772   loadGlobalAha();
 773   loadGlobalAhm();
 774   string numIters = folly::to<string>(
 775     std::min(1000000, int(FLAGS_numBMElements)));
 776
 777   gflags::SetCommandLineOptionWithMode(
 778     "bm_max_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
 779   );
 780   gflags::SetCommandLineOptionWithMode(
 781     "bm_min_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
 782   );
 783   string numCoresStr = folly::to<string>(numCores);
 784   gflags::SetCommandLineOptionWithMode(
 785     "numThreads", numCoresStr.c_str(), gflags::SET_FLAG_IF_DEFAULT
 786   );
 787
 788   std::cout << "\nRunning AHM benchmarks on machine with " << numCores
 789     << " logical cores.\n"
 790        "  num elements per map: " << FLAGS_numBMElements << "\n"
 791     << "  num threads for mt tests: " << FLAGS_numThreads << "\n"
 792     << "  AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
 793 }
 794
 795 int main(int argc, char** argv) {
 796   testing::InitGoogleTest(&argc, argv);
 797   gflags::ParseCommandLineFlags(&argc, &argv, true);
 798   auto ret = RUN_ALL_TESTS();
 799   if (!ret && FLAGS_benchmark) {
 800     benchmarkSetup();
 801     folly::runBenchmarks();
 802   }
 803   return ret;
 804 }
 805
 806 /*
 807 Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
 808   (12 physical cores, 12 MB cache, 72 GB RAM)
 809
 810 Running AHM benchmarks on machine with 24 logical cores.
 811   num elements per map: 12000000
 812   num threads for mt tests: 24
 813   AHM load factor: 0.75
 814
 815 Benchmark                               Iters   Total t    t/iter iter/sec
 816 ------------------------------------------------------------------------------
 817 Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find
 818 *       BM_mt_aha_find                1000000  7.767 ms  7.767 ns  122.8 M
 819  +0.81% BM_mt_ahm_find                1000000   7.83 ms   7.83 ns  121.8 M
 820 ------------------------------------------------------------------------------
 821 Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find
 822 *       BM_st_aha_find                1000000  57.83 ms  57.83 ns  16.49 M
 823  +77.9% BM_st_ahm_find                1000000  102.9 ms  102.9 ns   9.27 M
 824 ------------------------------------------------------------------------------
 825 BM_mt_ahm_miss                        1000000  2.937 ms  2.937 ns  324.7 M
 826 BM_st_ahm_miss                        1000000  164.2 ms  164.2 ns  5.807 M
 827 BM_mt_ahm_find_insert_mix             1000000  8.797 ms  8.797 ns  108.4 M
 828 BM_mt_ahm_insert                      1000000  17.39 ms  17.39 ns  54.83 M
 829 BM_st_ahm_insert                      1000000  106.8 ms  106.8 ns   8.93 M
 830 BM_st_baseline_modulus_and_rando      1000000  6.223 ms  6.223 ns  153.2 M
 831 */