folly/test/AtomicHashMapTest.cpp

   1 /*
   2  * Copyright 2012 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "folly/AtomicHashMap.h"
  18
  19 #include <glog/logging.h>
  20 #include <gtest/gtest.h>
  21 #include <sys/time.h>
  22 #include <thread>
  23 #include <atomic>
  24
  25 #include "folly/Benchmark.h"
  26 #include "folly/Conv.h"
  27
  28 using std::vector;
  29 using std::string;
  30 using folly::AtomicHashMap;
  31 using folly::AtomicHashArray;
  32
  33 // Tunables:
  34 DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
  35 DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
  36 DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
  37 DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
  38
  39 const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
  40 const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
  41
  42 static int64_t nowInUsec() {
  43   timeval tv;
  44   gettimeofday(&tv, 0);
  45   return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
  46 }
  47
  48 TEST(Ahm, BasicStrings) {
  49   typedef AtomicHashMap<int64_t,string> AHM;
  50   AHM myMap(1024);
  51   EXPECT_TRUE(myMap.begin() == myMap.end());
  52
  53   for (int i = 0; i < 100; ++i) {
  54     myMap.insert(make_pair(i, folly::to<string>(i)));
  55   }
  56   for (int i = 0; i < 100; ++i) {
  57     EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
  58   }
  59
  60   myMap.insert(std::make_pair(999, "A"));
  61   myMap.insert(std::make_pair(999, "B"));
  62   EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
  63   myMap.find(999)->second = "B";
  64   myMap.find(999)->second = "C";
  65   EXPECT_EQ(myMap.find(999)->second, "C");
  66   EXPECT_EQ(myMap.find(999)->first, 999);
  67 }
  68
  69 typedef int32_t     KeyT;
  70 typedef int64_t     KeyTBig;
  71 typedef int32_t     ValueT;
  72
  73 typedef AtomicHashMap<KeyT,ValueT> AHMapT;
  74 typedef AHMapT::value_type RecordT;
  75 typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
  76
  77 AHArrayT::Config config;
  78 static AHArrayT::SmartPtr globalAHA(nullptr);
  79 static std::unique_ptr<AHMapT> globalAHM;
  80
  81 // Generate a deterministic value based on an input key
  82 static int genVal(int key) {
  83   return key / 3;
  84 }
  85
  86 TEST(Ahm, grow) {
  87   VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
  88     sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
  89   int numEntries = 10000;
  90   float sizeFactor = 0.46;
  91
  92   std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
  93
  94   // load map - make sure we succeed and the index is accurate
  95   bool success = true;
  96   for (uint64_t i = 0; i < numEntries; i++) {
  97     auto ret = m->insert(RecordT(i, genVal(i)));
  98     success &= ret.second;
  99     success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
 100   }
 101   // Overwrite vals to make sure there are no dups
 102   // Every insert should fail because the keys are already in the map.
 103   success = true;
 104   for (uint64_t i = 0; i < numEntries; i++) {
 105     auto ret = m->insert(RecordT(i, genVal(i * 2)));
 106     success &= (ret.second == false);  // fail on collision
 107     success &= (ret.first->second == genVal(i)); // return the previous value
 108     success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
 109   }
 110   EXPECT_TRUE(success);
 111
 112   // check correctness
 113   size_t cap = m->capacity();
 114   ValueT val;
 115   EXPECT_GT(m->numSubMaps(), 1);  // make sure we grew
 116   success = true;
 117   EXPECT_EQ(m->size(), numEntries);
 118   for (int i = 0; i < numEntries; i++) {
 119     success &= (m->find(i)->second == genVal(i));
 120   }
 121   EXPECT_TRUE(success);
 122
 123   // Check findAt
 124   success = true;
 125   KeyT key(0);
 126   AHMapT::const_iterator retIt;
 127   for (uint64_t i = 0; i < numEntries; i++) {
 128     retIt = m->find(i);
 129     retIt = m->findAt(retIt.getIndex());
 130     success &= (retIt->second == genVal(i));
 131     success &= (retIt->first == i);
 132   }
 133   EXPECT_TRUE(success);
 134
 135   // Try modifying value
 136   m->find(8)->second = 5309;
 137   EXPECT_EQ(m->find(8)->second, 5309);
 138
 139   // check clear()
 140   m->clear();
 141   success = true;
 142   for (uint64_t i = 0; i < numEntries / 2; i++) {
 143     success &= m->insert(RecordT(i, genVal(i))).second;
 144   }
 145   EXPECT_TRUE(success);
 146   EXPECT_EQ(m->size(), numEntries / 2);
 147 }
 148
 149 TEST(Ahm, iterator) {
 150   int numEntries = 10000;
 151   float sizeFactor = .46;
 152   std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
 153
 154   // load map - make sure we succeed and the index is accurate
 155   for (uint64_t i = 0; i < numEntries; i++) {
 156     m->insert(RecordT(i, genVal(i)));
 157   }
 158
 159   bool success = true;
 160   int count = 0;
 161   FOR_EACH(it, *m) {
 162     success &= (it->second == genVal(it->first));
 163     ++count;
 164   }
 165   EXPECT_TRUE(success);
 166   EXPECT_EQ(count, numEntries);
 167 }
 168
 169 class Counters {
 170 private:
 171   // NOTE: Unfortunately can't currently put a std::atomic<int64_t> in
 172   // the value in ahm since it doesn't support non-copyable but
 173   // move-constructible value types yet.
 174   AtomicHashMap<int64_t,int64_t> ahm;
 175
 176 public:
 177   explicit Counters(size_t numCounters) : ahm(numCounters) {}
 178
 179   void increment(int64_t obj_id) {
 180     auto ret = ahm.insert(std::make_pair(obj_id, 1));
 181     if (!ret.second) {
 182       // obj_id already exists, increment count
 183       __sync_fetch_and_add(&ret.first->second, 1);
 184     }
 185   }
 186
 187   int64_t getValue(int64_t obj_id) {
 188     auto ret = ahm.find(obj_id);
 189     return ret != ahm.end() ? ret->second : 0;
 190   }
 191
 192   // export the counters without blocking increments
 193   string toString() {
 194     string ret = "{\n";
 195     ret.reserve(ahm.size() * 32);
 196     for (const auto& e : ahm) {
 197       ret += folly::to<string>(
 198         "  [", e.first, ":", e.second, "]\n");
 199     }
 200     ret += "}\n";
 201     return ret;
 202   }
 203 };
 204
 205 // If you get an error "terminate called without an active exception", there
 206 // might be too many threads getting created - decrease numKeys and/or mult.
 207 TEST(Ahm, counter) {
 208   const int numKeys = 10;
 209   const int mult = 10;
 210   Counters c(numKeys);
 211   vector<int64_t> keys;
 212   FOR_EACH_RANGE(i, 1, numKeys) {
 213     keys.push_back(i);
 214   }
 215   vector<std::thread> threads;
 216   for (auto key : keys) {
 217     FOR_EACH_RANGE(i, 0, key * mult) {
 218       threads.push_back(std::thread([&, key] { c.increment(key); }));
 219     }
 220   }
 221   for (auto& t : threads) {
 222     t.join();
 223   }
 224   string str = c.toString();
 225   for (auto key : keys) {
 226     int val = key * mult;
 227     EXPECT_EQ(val, c.getValue(key));
 228     EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
 229   }
 230 }
 231
 232 class Integer {
 233
 234  public:
 235   explicit Integer(KeyT v = 0) : v_(v) {}
 236
 237   Integer& operator=(const Integer& a) {
 238     static bool throwException_ = false;
 239     throwException_ = !throwException_;
 240     if (throwException_) {
 241       throw 1;
 242     }
 243     v_ = a.v_;
 244     return *this;
 245   }
 246
 247   bool operator==(const Integer& a) const { return v_ == a.v_; }
 248
 249  private:
 250   KeyT v_;
 251 };
 252
 253 TEST(Ahm, map_exception_safety) {
 254   typedef AtomicHashMap<KeyT,Integer> MyMapT;
 255
 256   int numEntries = 10000;
 257   float sizeFactor = 0.46;
 258   std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
 259
 260   bool success = true;
 261   int count = 0;
 262   for (int i = 0; i < numEntries; i++) {
 263     try {
 264       m->insert(i, Integer(genVal(i)));
 265       success &= (m->find(i)->second == Integer(genVal(i)));
 266       ++count;
 267     } catch (...) {
 268       success &= !m->count(i);
 269     }
 270   }
 271   EXPECT_EQ(count, m->size());
 272   EXPECT_TRUE(success);
 273 }
 274
 275 TEST(Ahm, basicErase) {
 276   int numEntries = 3000;
 277
 278   std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
 279   // Iterate filling up the map and deleting all keys a few times
 280   // to test more than one subMap.
 281   for (int iterations = 0; iterations < 4; ++iterations) {
 282     // Testing insertion of keys
 283     bool success = true;
 284     for (uint64_t i = 0; i < numEntries; ++i) {
 285       success &= !(s->count(i));
 286       auto ret = s->insert(RecordT(i, i));
 287       success &= s->count(i);
 288       success &= ret.second;
 289     }
 290     EXPECT_TRUE(success);
 291     EXPECT_EQ(s->size(), numEntries);
 292
 293     // Delete every key in the map and verify that the key is gone and the the
 294     // size is correct.
 295     success = true;
 296     for (uint64_t i = 0; i < numEntries; ++i) {
 297       success &= s->erase(i);
 298       success &= (s->size() == numEntries - 1 - i);
 299       success &= !(s->count(i));
 300       success &= !(s->erase(i));
 301     }
 302     EXPECT_TRUE(success);
 303   }
 304   VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
 305 }
 306
 307 namespace {
 308
 309 inline KeyT randomizeKey(int key) {
 310   // We deterministically randomize the key to more accurately simulate
 311   // real-world usage, and to avoid pathalogical performance patterns (e.g.
 312   // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
 313   //
 314   // Use a hash function we don't normally use for ints to avoid interactions.
 315   return folly::hash::jenkins_rev_mix32(key);
 316 }
 317
 318 int numOpsPerThread = 0;
 319
 320 void* insertThread(void* jj) {
 321   int64_t j = (int64_t) jj;
 322   for (int i = 0; i < numOpsPerThread; ++i) {
 323     KeyT key = randomizeKey(i + j * numOpsPerThread);
 324     globalAHM->insert(key, genVal(key));
 325   }
 326   return NULL;
 327 }
 328
 329 void* insertThreadArr(void* jj) {
 330   int64_t j = (int64_t) jj;
 331   for (int i = 0; i < numOpsPerThread; ++i) {
 332     KeyT key = randomizeKey(i + j * numOpsPerThread);
 333     globalAHA->insert(std::make_pair(key, genVal(key)));
 334   }
 335   return NULL;
 336 }
 337
 338 std::atomic<bool> runThreadsCreatedAllThreads;
 339 void runThreads(void *(*thread)(void*), int numThreads, void **statuses) {
 340   folly::BenchmarkSuspender susp;
 341   runThreadsCreatedAllThreads.store(false);
 342   vector<pthread_t> threadIds;
 343   for (int64_t j = 0; j < numThreads; j++) {
 344     pthread_t tid;
 345     if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
 346        LOG(ERROR) << "Could not start thread";
 347     } else {
 348       threadIds.push_back(tid);
 349     }
 350   }
 351   susp.dismiss();
 352
 353   runThreadsCreatedAllThreads.store(true);
 354   for (int i = 0; i < threadIds.size(); ++i) {
 355     pthread_join(threadIds[i], statuses == NULL ? NULL : &statuses[i]);
 356   }
 357 }
 358
 359 void runThreads(void *(*thread)(void*)) {
 360   runThreads(thread, FLAGS_numThreads, NULL);
 361 }
 362
 363 }
 364
 365 TEST(Ahm, collision_test) {
 366   const int numInserts = 1000000 / 4;
 367
 368   // Doing the same number on each thread so we collide.
 369   numOpsPerThread = numInserts;
 370
 371   float sizeFactor = 0.46;
 372   int entrySize = sizeof(KeyT) + sizeof(ValueT);
 373   VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
 374     " Byte entries replicated in " << FLAGS_numThreads <<
 375     " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
 376
 377   globalAHM.reset(new AHMapT(int(numInserts * sizeFactor), config));
 378
 379   size_t sizeInit = globalAHM->capacity();
 380   VLOG(1) << "  Initial capacity: " << sizeInit;
 381
 382   double start = nowInUsec();
 383   runThreads([](void*) -> void* { // collisionInsertThread
 384     for (int i = 0; i < numOpsPerThread; ++i) {
 385       KeyT key = randomizeKey(i);
 386       globalAHM->insert(key, genVal(key));
 387     }
 388     return nullptr;
 389   });
 390   double elapsed = nowInUsec() - start;
 391
 392   size_t finalCap = globalAHM->capacity();
 393   size_t sizeAHM = globalAHM->size();
 394   VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
 395     " duplicate inserts (atomic).";
 396   VLOG(1) << "  Final capacity: " << finalCap << " in " <<
 397     globalAHM->numSubMaps() << " sub maps (" <<
 398     sizeAHM * 100 / finalCap << "% load factor, " <<
 399     (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
 400
 401   // check correctness
 402   EXPECT_EQ(sizeAHM, numInserts);
 403   bool success = true;
 404   ValueT val;
 405   for (int i = 0; i < numInserts; ++i) {
 406     KeyT key = randomizeKey(i);
 407     success &= (globalAHM->find(key)->second == genVal(key));
 408   }
 409   EXPECT_TRUE(success);
 410
 411   // check colliding finds
 412   start = nowInUsec();
 413   runThreads([](void*) -> void* { // collisionFindThread
 414     KeyT key(0);
 415     for (int i = 0; i < numOpsPerThread; ++i) {
 416       globalAHM->find(key);
 417     }
 418     return nullptr;
 419   });
 420
 421   elapsed = nowInUsec() - start;
 422
 423   VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
 424     " duplicate finds (atomic).";
 425 }
 426
 427 namespace {
 428
 429 const int kInsertPerThread = 100000;
 430 int raceFinalSizeEstimate;
 431
 432 void* raceIterateThread(void* jj) {
 433   int64_t j = (int64_t) jj;
 434   int count = 0;
 435
 436   AHMapT::iterator it = globalAHM->begin();
 437   AHMapT::iterator end = globalAHM->end();
 438   for (; it != end; ++it) {
 439     ++count;
 440     if (count > raceFinalSizeEstimate) {
 441       EXPECT_FALSE("Infinite loop in iterator.");
 442       return NULL;
 443     }
 444   }
 445   return NULL;
 446 }
 447
 448 void* raceInsertRandomThread(void* jj) {
 449   int64_t j = (int64_t) jj;
 450   for (int i = 0; i < kInsertPerThread; ++i) {
 451     KeyT key = rand();
 452     globalAHM->insert(key, genVal(key));
 453   }
 454   return NULL;
 455 }
 456
 457 }
 458
 459 // Test for race conditions when inserting and iterating at the same time and
 460 // creating multiple submaps.
 461 TEST(Ahm, race_insert_iterate_thread_test) {
 462   const int kInsertThreads = 20;
 463   const int kIterateThreads = 20;
 464   raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
 465
 466   VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
 467     << " threads inserting and " << kIterateThreads << " threads iterating.";
 468
 469   globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
 470
 471   vector<pthread_t> threadIds;
 472   for (int64_t j = 0; j < kInsertThreads + kIterateThreads; j++) {
 473     pthread_t tid;
 474     void *(*thread)(void*) =
 475       (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
 476     if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
 477       LOG(ERROR) << "Could not start thread";
 478     } else {
 479       threadIds.push_back(tid);
 480     }
 481   }
 482   for (int i = 0; i < threadIds.size(); ++i) {
 483     pthread_join(threadIds[i], NULL);
 484   }
 485   VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
 486   VLOG(1) << "Final size of map " << globalAHM->size();
 487 }
 488
 489 namespace {
 490
 491 const int kTestEraseInsertions = 200000;
 492 std::atomic<int32_t> insertedLevel;
 493
 494 void* testEraseInsertThread(void*) {
 495   for (int i = 0; i < kTestEraseInsertions; ++i) {
 496     KeyT key = randomizeKey(i);
 497     globalAHM->insert(key, genVal(key));
 498     insertedLevel.store(i, std::memory_order_release);
 499   }
 500   insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
 501   return NULL;
 502 }
 503
 504 void* testEraseEraseThread(void*) {
 505   for (int i = 0; i < kTestEraseInsertions; ++i) {
 506     /*
 507      * Make sure that we don't get ahead of the insert thread, because
 508      * part of the condition for this unit test succeeding is that the
 509      * map ends up empty.
 510      *
 511      * Note, there is a subtle case here when a new submap is
 512      * allocated: the erasing thread might get 0 from count(key)
 513      * because it hasn't seen numSubMaps_ update yet.  To avoid this
 514      * race causing problems for the test (it's ok for real usage), we
 515      * lag behind the inserter by more than just element.
 516      */
 517     const int lag = 10;
 518     int currentLevel;
 519     do {
 520       currentLevel = insertedLevel.load(std::memory_order_acquire);
 521       if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
 522     } while (currentLevel - lag < i);
 523
 524     KeyT key = randomizeKey(i);
 525     while (globalAHM->count(key)) {
 526       if (globalAHM->erase(key)) {
 527         break;
 528       }
 529     }
 530   }
 531   return NULL;
 532 }
 533
 534 }
 535
 536 // Here we have a single thread inserting some values, and several threads
 537 // racing to delete the values in the order they were inserted.
 538 TEST(Ahm, thread_erase_insert_race) {
 539   const int kInsertThreads = 1;
 540   const int kEraseThreads = 10;
 541
 542   VLOG(1) << "Testing insertion and erase with " << kInsertThreads
 543     << " thread inserting and " << kEraseThreads << " threads erasing.";
 544
 545   globalAHM.reset(new AHMapT(kTestEraseInsertions / 4, config));
 546
 547   vector<pthread_t> threadIds;
 548   for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
 549     pthread_t tid;
 550     void *(*thread)(void*) =
 551       (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
 552     if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
 553       LOG(ERROR) << "Could not start thread";
 554     } else {
 555       threadIds.push_back(tid);
 556     }
 557   }
 558   for (int i = 0; i < threadIds.size(); i++) {
 559     pthread_join(threadIds[i], NULL);
 560   }
 561
 562   EXPECT_TRUE(globalAHM->empty());
 563   EXPECT_EQ(globalAHM->size(), 0);
 564
 565   VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
 566 }
 567
 568 // Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
 569 typedef AtomicHashArray<int32_t, int32_t> AHA;
 570 AHA::Config configRace;
 571 auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
 572 void* atomicHashArrayInsertRaceThread(void* j) {
 573   AHA* arr = atomicHashArrayInsertRaceArray.get();
 574   uintptr_t numInserted = 0;
 575   while (!runThreadsCreatedAllThreads.load());
 576   for (int i = 0; i < 2; i++) {
 577     if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
 578       numInserted++;
 579     }
 580   }
 581   pthread_exit((void *) numInserted);
 582 }
 583 TEST(Ahm, atomic_hash_array_insert_race) {
 584   AHA* arr = atomicHashArrayInsertRaceArray.get();
 585   int numIterations = 50000, FLAGS_numThreads = 4;
 586   void* statuses[FLAGS_numThreads];
 587   for (int i = 0; i < numIterations; i++) {
 588     arr->clear();
 589     runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses);
 590     EXPECT_GE(arr->size(), 1);
 591     for (int j = 0; j < FLAGS_numThreads; j++) {
 592       EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
 593     }
 594   }
 595 }
 596
 597 namespace {
 598
 599 void loadGlobalAha() {
 600   std::cout << "loading global AHA with " << FLAGS_numThreads
 601             << " threads...\n";
 602   uint64_t start = nowInUsec();
 603   globalAHA = AHArrayT::create(maxBMElements, config);
 604   numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
 605   CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
 606     "kNumThreads must evenly divide kNumInserts.";
 607   runThreads(insertThreadArr);
 608   uint64_t elapsed = nowInUsec() - start;
 609   std::cout << "  took " << elapsed / 1000 << " ms (" <<
 610     (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
 611   EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
 612 }
 613
 614 void loadGlobalAhm() {
 615   std::cout << "loading global AHM with " << FLAGS_numThreads
 616             << " threads...\n";
 617   uint64_t start = nowInUsec();
 618   globalAHM.reset(new AHMapT(maxBMElements, config));
 619   numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
 620   runThreads(insertThread);
 621   uint64_t elapsed = nowInUsec() - start;
 622   std::cout << "  took " << elapsed / 1000 << " ms (" <<
 623     (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
 624   EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
 625 }
 626
 627 }
 628
 629 BENCHMARK(st_aha_find, iters) {
 630   CHECK_LE(iters, FLAGS_numBMElements);
 631   for (int i = 0; i < iters; i++) {
 632     KeyT key = randomizeKey(i);
 633     folly::doNotOptimizeAway(globalAHA->find(key)->second);
 634   }
 635 }
 636
 637 BENCHMARK(st_ahm_find, iters) {
 638   CHECK_LE(iters, FLAGS_numBMElements);
 639   for (int i = 0; i < iters; i++) {
 640     KeyT key = randomizeKey(i);
 641     folly::doNotOptimizeAway(globalAHM->find(key)->second);
 642   }
 643 }
 644
 645 BENCHMARK_DRAW_LINE()
 646
 647 BENCHMARK(mt_ahm_miss, iters) {
 648   CHECK_LE(iters, FLAGS_numBMElements);
 649   numOpsPerThread = iters / FLAGS_numThreads;
 650   runThreads([](void* jj) -> void* {
 651     int64_t j = (int64_t) jj;
 652     while (!runThreadsCreatedAllThreads.load());
 653     for (int i = 0; i < numOpsPerThread; ++i) {
 654       KeyT key = i + j * numOpsPerThread * 100;
 655       folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
 656     }
 657     return nullptr;
 658   });
 659 }
 660
 661 BENCHMARK(st_ahm_miss, iters) {
 662   CHECK_LE(iters, FLAGS_numBMElements);
 663   for (int i = 0; i < iters; i++) {
 664     KeyT key = randomizeKey(i + iters * 100);
 665     folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
 666   }
 667 }
 668
 669 BENCHMARK(mt_ahm_find_insert_mix, iters) {
 670   CHECK_LE(iters, FLAGS_numBMElements);
 671   numOpsPerThread = iters / FLAGS_numThreads;
 672   runThreads([](void* jj) -> void* {
 673     int64_t j = (int64_t) jj;
 674     while (!runThreadsCreatedAllThreads.load());
 675     for (int i = 0; i < numOpsPerThread; ++i) {
 676       if (i % 128) {  // ~1% insert mix
 677         KeyT key = randomizeKey(i + j * numOpsPerThread);
 678         folly::doNotOptimizeAway(globalAHM->find(key)->second);
 679       } else {
 680         KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
 681         globalAHM->insert(key, genVal(key));
 682       }
 683     }
 684     return nullptr;
 685   });
 686 }
 687
 688 BENCHMARK(mt_aha_find, iters) {
 689   CHECK_LE(iters, FLAGS_numBMElements);
 690   numOpsPerThread = iters / FLAGS_numThreads;
 691   runThreads([](void* jj) -> void* {
 692       int64_t j = (int64_t) jj;
 693       while (!runThreadsCreatedAllThreads.load());
 694       for (int i = 0; i < numOpsPerThread; ++i) {
 695         KeyT key = randomizeKey(i + j * numOpsPerThread);
 696         folly::doNotOptimizeAway(globalAHA->find(key)->second);
 697       }
 698       return nullptr;
 699     });
 700 }
 701
 702 BENCHMARK(mt_ahm_find, iters) {
 703   CHECK_LE(iters, FLAGS_numBMElements);
 704   numOpsPerThread = iters / FLAGS_numThreads;
 705   runThreads([](void* jj) -> void* {
 706     int64_t j = (int64_t) jj;
 707     while (!runThreadsCreatedAllThreads.load());
 708     for (int i = 0; i < numOpsPerThread; ++i) {
 709       KeyT key = randomizeKey(i + j * numOpsPerThread);
 710       folly::doNotOptimizeAway(globalAHM->find(key)->second);
 711     }
 712     return nullptr;
 713   });
 714 }
 715
 716 KeyT k;
 717 BENCHMARK(st_baseline_modulus_and_random, iters) {
 718   for (int i = 0; i < iters; ++i) {
 719     k = randomizeKey(i) % iters;
 720   }
 721 }
 722
 723 // insertions go last because they reset the map
 724
 725 BENCHMARK(mt_ahm_insert, iters) {
 726   BENCHMARK_SUSPEND {
 727     globalAHM.reset(new AHMapT(int(iters * LF), config));
 728     numOpsPerThread = iters / FLAGS_numThreads;
 729   }
 730   runThreads(insertThread);
 731 }
 732
 733 BENCHMARK(st_ahm_insert, iters) {
 734   folly::BenchmarkSuspender susp;
 735   std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
 736   susp.dismiss();
 737
 738   for (int i = 0; i < iters; i++) {
 739     KeyT key = randomizeKey(i);
 740     ahm->insert(key, genVal(key));
 741   }
 742 }
 743
 744 void benchmarkSetup() {
 745   config.maxLoadFactor = FLAGS_maxLoadFactor;
 746   configRace.maxLoadFactor = 0.5;
 747   int numCores = sysconf(_SC_NPROCESSORS_ONLN);
 748   loadGlobalAha();
 749   loadGlobalAhm();
 750   string numIters = folly::to<string>(
 751     std::min(1000000, int(FLAGS_numBMElements)));
 752
 753   google::SetCommandLineOptionWithMode(
 754     "bm_max_iters", numIters.c_str(), google::SET_FLAG_IF_DEFAULT
 755   );
 756   google::SetCommandLineOptionWithMode(
 757     "bm_min_iters", numIters.c_str(), google::SET_FLAG_IF_DEFAULT
 758   );
 759   string numCoresStr = folly::to<string>(numCores);
 760   google::SetCommandLineOptionWithMode(
 761     "numThreads", numCoresStr.c_str(), google::SET_FLAG_IF_DEFAULT
 762   );
 763
 764   std::cout << "\nRunning AHM benchmarks on machine with " << numCores
 765     << " logical cores.\n"
 766        "  num elements per map: " << FLAGS_numBMElements << "\n"
 767     << "  num threads for mt tests: " << FLAGS_numThreads << "\n"
 768     << "  AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
 769 }
 770
 771 int main(int argc, char** argv) {
 772   testing::InitGoogleTest(&argc, argv);
 773   google::ParseCommandLineFlags(&argc, &argv, true);
 774   auto ret = RUN_ALL_TESTS();
 775   if (!ret && FLAGS_benchmark) {
 776     benchmarkSetup();
 777     folly::runBenchmarks();
 778   }
 779   return ret;
 780 }
 781
 782 /*
 783 Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
 784   (12 physical cores, 12 MB cache, 72 GB RAM)
 785
 786 Running AHM benchmarks on machine with 24 logical cores.
 787   num elements per map: 12000000
 788   num threads for mt tests: 24
 789   AHM load factor: 0.75
 790
 791 Benchmark                               Iters   Total t    t/iter iter/sec
 792 ------------------------------------------------------------------------------
 793 Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find
 794 *       BM_mt_aha_find                1000000  7.767 ms  7.767 ns  122.8 M
 795  +0.81% BM_mt_ahm_find                1000000   7.83 ms   7.83 ns  121.8 M
 796 ------------------------------------------------------------------------------
 797 Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find
 798 *       BM_st_aha_find                1000000  57.83 ms  57.83 ns  16.49 M
 799  +77.9% BM_st_ahm_find                1000000  102.9 ms  102.9 ns   9.27 M
 800 ------------------------------------------------------------------------------
 801 BM_mt_ahm_miss                        1000000  2.937 ms  2.937 ns  324.7 M
 802 BM_st_ahm_miss                        1000000  164.2 ms  164.2 ns  5.807 M
 803 BM_mt_ahm_find_insert_mix             1000000  8.797 ms  8.797 ns  108.4 M
 804 BM_mt_ahm_insert                      1000000  17.39 ms  17.39 ns  54.83 M
 805 BM_st_ahm_insert                      1000000  106.8 ms  106.8 ns   8.93 M
 806 BM_st_baseline_modulus_and_rando      1000000  6.223 ms  6.223 ns  153.2 M
 807 */