folly/test/SharedMutexTest.cpp

   1 /*
   2  * Copyright 2015-present Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 #include <folly/SharedMutex.h>
  17
  18 #include <stdlib.h>
  19 #include <thread>
  20 #include <vector>
  21
  22 #include <boost/optional.hpp>
  23 #include <boost/thread/shared_mutex.hpp>
  24
  25 #include <folly/Benchmark.h>
  26 #include <folly/MPMCQueue.h>
  27 #include <folly/portability/GFlags.h>
  28 #include <folly/portability/GTest.h>
  29 #include <folly/synchronization/RWSpinLock.h>
  30 #include <folly/test/DeterministicSchedule.h>
  31
  32 using namespace folly;
  33 using namespace folly::test;
  34 using namespace std;
  35 using namespace std::chrono;
  36
  37 typedef DeterministicSchedule DSched;
  38 typedef SharedMutexImpl<true, void, DeterministicAtomic, true>
  39     DSharedMutexReadPriority;
  40 typedef SharedMutexImpl<false, void, DeterministicAtomic, true>
  41     DSharedMutexWritePriority;
  42
  43 template <typename Lock>
  44 void runBasicTest() {
  45   Lock lock;
  46   SharedMutexToken token1;
  47   SharedMutexToken token2;
  48   SharedMutexToken token3;
  49
  50   EXPECT_TRUE(lock.try_lock());
  51   EXPECT_FALSE(lock.try_lock());
  52   EXPECT_FALSE(lock.try_lock_shared(token1));
  53   lock.unlock();
  54
  55   EXPECT_TRUE(lock.try_lock_shared(token1));
  56   EXPECT_FALSE(lock.try_lock());
  57   EXPECT_TRUE(lock.try_lock_shared(token2));
  58   lock.lock_shared(token3);
  59   lock.unlock_shared(token3);
  60   lock.unlock_shared(token2);
  61   lock.unlock_shared(token1);
  62
  63   lock.lock();
  64   lock.unlock();
  65
  66   lock.lock_shared(token1);
  67   lock.lock_shared(token2);
  68   lock.unlock_shared(token1);
  69   lock.unlock_shared(token2);
  70
  71   lock.lock();
  72   lock.unlock_and_lock_shared(token1);
  73   lock.lock_shared(token2);
  74   lock.unlock_shared(token2);
  75   lock.unlock_shared(token1);
  76 }
  77
  78 TEST(SharedMutex, basic) {
  79   runBasicTest<SharedMutexReadPriority>();
  80   runBasicTest<SharedMutexWritePriority>();
  81 }
  82
  83 template <typename Lock>
  84 void runBasicHoldersTest() {
  85   Lock lock;
  86   SharedMutexToken token;
  87
  88   {
  89     // create an exclusive write lock via holder
  90     typename Lock::WriteHolder holder(lock);
  91     EXPECT_FALSE(lock.try_lock());
  92     EXPECT_FALSE(lock.try_lock_shared(token));
  93
  94     // move ownership to another write holder via move constructor
  95     typename Lock::WriteHolder holder2(std::move(holder));
  96     EXPECT_FALSE(lock.try_lock());
  97     EXPECT_FALSE(lock.try_lock_shared(token));
  98
  99     // move ownership to another write holder via assign operator
 100     typename Lock::WriteHolder holder3(nullptr);
 101     holder3 = std::move(holder2);
 102     EXPECT_FALSE(lock.try_lock());
 103     EXPECT_FALSE(lock.try_lock_shared(token));
 104
 105     // downgrade from exclusive to upgrade lock via move constructor
 106     typename Lock::UpgradeHolder holder4(std::move(holder3));
 107
 108     // ensure we can lock from a shared source
 109     EXPECT_FALSE(lock.try_lock());
 110     EXPECT_TRUE(lock.try_lock_shared(token));
 111     lock.unlock_shared(token);
 112
 113     // promote from upgrade to exclusive lock via move constructor
 114     typename Lock::WriteHolder holder5(std::move(holder4));
 115     EXPECT_FALSE(lock.try_lock());
 116     EXPECT_FALSE(lock.try_lock_shared(token));
 117
 118     // downgrade exclusive to shared lock via move constructor
 119     typename Lock::ReadHolder holder6(std::move(holder5));
 120
 121     // ensure we can lock from another shared source
 122     EXPECT_FALSE(lock.try_lock());
 123     EXPECT_TRUE(lock.try_lock_shared(token));
 124     lock.unlock_shared(token);
 125   }
 126
 127   {
 128     typename Lock::WriteHolder holder(lock);
 129     EXPECT_FALSE(lock.try_lock());
 130   }
 131
 132   {
 133     typename Lock::ReadHolder holder(lock);
 134     typename Lock::ReadHolder holder2(lock);
 135     typename Lock::UpgradeHolder holder3(lock);
 136   }
 137
 138   {
 139     typename Lock::UpgradeHolder holder(lock);
 140     typename Lock::ReadHolder holder2(lock);
 141     typename Lock::ReadHolder holder3(std::move(holder));
 142   }
 143 }
 144
 145 TEST(SharedMutex, basic_holders) {
 146   runBasicHoldersTest<SharedMutexReadPriority>();
 147   runBasicHoldersTest<SharedMutexWritePriority>();
 148 }
 149
 150 template <typename Lock>
 151 void runManyReadLocksTestWithTokens() {
 152   Lock lock;
 153
 154   vector<SharedMutexToken> tokens;
 155   for (int i = 0; i < 1000; ++i) {
 156     tokens.emplace_back();
 157     EXPECT_TRUE(lock.try_lock_shared(tokens.back()));
 158   }
 159   for (auto& token : tokens) {
 160     lock.unlock_shared(token);
 161   }
 162   EXPECT_TRUE(lock.try_lock());
 163   lock.unlock();
 164 }
 165
 166 TEST(SharedMutex, many_read_locks_with_tokens) {
 167   runManyReadLocksTestWithTokens<SharedMutexReadPriority>();
 168   runManyReadLocksTestWithTokens<SharedMutexWritePriority>();
 169 }
 170
 171 template <typename Lock>
 172 void runManyReadLocksTestWithoutTokens() {
 173   Lock lock;
 174
 175   for (int i = 0; i < 1000; ++i) {
 176     EXPECT_TRUE(lock.try_lock_shared());
 177   }
 178   for (int i = 0; i < 1000; ++i) {
 179     lock.unlock_shared();
 180   }
 181   EXPECT_TRUE(lock.try_lock());
 182   lock.unlock();
 183 }
 184
 185 TEST(SharedMutex, many_read_locks_without_tokens) {
 186   runManyReadLocksTestWithoutTokens<SharedMutexReadPriority>();
 187   runManyReadLocksTestWithoutTokens<SharedMutexWritePriority>();
 188 }
 189
 190 template <typename Lock>
 191 void runTimeoutInPastTest() {
 192   Lock lock;
 193
 194   EXPECT_TRUE(lock.try_lock_for(milliseconds(0)));
 195   lock.unlock();
 196   EXPECT_TRUE(lock.try_lock_for(milliseconds(-1)));
 197   lock.unlock();
 198   EXPECT_TRUE(lock.try_lock_shared_for(milliseconds(0)));
 199   lock.unlock_shared();
 200   EXPECT_TRUE(lock.try_lock_shared_for(milliseconds(-1)));
 201   lock.unlock_shared();
 202   EXPECT_TRUE(lock.try_lock_until(system_clock::now() - milliseconds(1)));
 203   lock.unlock();
 204   EXPECT_TRUE(
 205       lock.try_lock_shared_until(system_clock::now() - milliseconds(1)));
 206   lock.unlock_shared();
 207   EXPECT_TRUE(lock.try_lock_until(steady_clock::now() - milliseconds(1)));
 208   lock.unlock();
 209   EXPECT_TRUE(
 210       lock.try_lock_shared_until(steady_clock::now() - milliseconds(1)));
 211   lock.unlock_shared();
 212 }
 213
 214 TEST(SharedMutex, timeout_in_past) {
 215   runTimeoutInPastTest<SharedMutexReadPriority>();
 216   runTimeoutInPastTest<SharedMutexWritePriority>();
 217 }
 218
 219 template <class Func>
 220 bool funcHasDuration(milliseconds expectedDuration, Func func) {
 221   // elapsed time should eventually fall within expectedDuration +- 25%
 222   for (int tries = 0; tries < 100; ++tries) {
 223     auto start = steady_clock::now();
 224     func();
 225     auto elapsed = steady_clock::now() - start;
 226     if (elapsed > expectedDuration - expectedDuration / 4 &&
 227         elapsed < expectedDuration + expectedDuration / 4) {
 228       return true;
 229     }
 230   }
 231   return false;
 232 }
 233
 234 template <typename Lock>
 235 void runFailingTryTimeoutTest() {
 236   Lock lock;
 237   lock.lock();
 238   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 239     EXPECT_FALSE(lock.try_lock_for(milliseconds(10)));
 240   }));
 241   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 242     typename Lock::Token token;
 243     EXPECT_FALSE(lock.try_lock_shared_for(milliseconds(10), token));
 244   }));
 245   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 246     EXPECT_FALSE(lock.try_lock_upgrade_for(milliseconds(10)));
 247   }));
 248   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 249     EXPECT_FALSE(lock.try_lock_until(steady_clock::now() + milliseconds(10)));
 250   }));
 251   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 252     typename Lock::Token token;
 253     EXPECT_FALSE(lock.try_lock_shared_until(
 254         steady_clock::now() + milliseconds(10), token));
 255   }));
 256   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 257     EXPECT_FALSE(
 258         lock.try_lock_upgrade_until(steady_clock::now() + milliseconds(10)));
 259   }));
 260   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 261     EXPECT_FALSE(lock.try_lock_until(system_clock::now() + milliseconds(10)));
 262   }));
 263   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 264     typename Lock::Token token;
 265     EXPECT_FALSE(lock.try_lock_shared_until(
 266         system_clock::now() + milliseconds(10), token));
 267   }));
 268   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 269     EXPECT_FALSE(
 270         lock.try_lock_upgrade_until(system_clock::now() + milliseconds(10)));
 271   }));
 272   lock.unlock();
 273
 274   lock.lock_shared();
 275   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 276     EXPECT_FALSE(lock.try_lock_for(milliseconds(10)));
 277   }));
 278   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 279     EXPECT_FALSE(lock.try_lock_until(steady_clock::now() + milliseconds(10)));
 280   }));
 281   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
 282     EXPECT_FALSE(lock.try_lock_until(system_clock::now() + milliseconds(10)));
 283   }));
 284   lock.unlock_shared();
 285
 286   lock.lock();
 287   for (int p = 0; p < 8; ++p) {
 288     EXPECT_FALSE(lock.try_lock_for(nanoseconds(1 << p)));
 289   }
 290   lock.unlock();
 291
 292   for (int p = 0; p < 8; ++p) {
 293     typename Lock::ReadHolder holder1(lock);
 294     typename Lock::ReadHolder holder2(lock);
 295     typename Lock::ReadHolder holder3(lock);
 296     EXPECT_FALSE(lock.try_lock_for(nanoseconds(1 << p)));
 297   }
 298 }
 299
 300 TEST(SharedMutex, failing_try_timeout) {
 301   runFailingTryTimeoutTest<SharedMutexReadPriority>();
 302   runFailingTryTimeoutTest<SharedMutexWritePriority>();
 303 }
 304
 305 template <typename Lock>
 306 void runBasicUpgradeTest() {
 307   Lock lock;
 308   typename Lock::Token token1;
 309   typename Lock::Token token2;
 310
 311   lock.lock_upgrade();
 312   EXPECT_FALSE(lock.try_lock());
 313   EXPECT_TRUE(lock.try_lock_shared(token1));
 314   lock.unlock_shared(token1);
 315   lock.unlock_upgrade();
 316
 317   lock.lock_upgrade();
 318   lock.unlock_upgrade_and_lock();
 319   EXPECT_FALSE(lock.try_lock_shared(token1));
 320   lock.unlock();
 321
 322   lock.lock_upgrade();
 323   lock.unlock_upgrade_and_lock_shared(token1);
 324   lock.lock_upgrade();
 325   lock.unlock_upgrade_and_lock_shared(token2);
 326   lock.unlock_shared(token1);
 327   lock.unlock_shared(token2);
 328
 329   lock.lock();
 330   lock.unlock_and_lock_upgrade();
 331   EXPECT_TRUE(lock.try_lock_shared(token1));
 332   lock.unlock_upgrade();
 333   lock.unlock_shared(token1);
 334 }
 335
 336 TEST(SharedMutex, basic_upgrade_tests) {
 337   runBasicUpgradeTest<SharedMutexReadPriority>();
 338   runBasicUpgradeTest<SharedMutexWritePriority>();
 339 }
 340
 341 TEST(SharedMutex, read_has_prio) {
 342   SharedMutexReadPriority lock;
 343   SharedMutexToken token1;
 344   SharedMutexToken token2;
 345   lock.lock_shared(token1);
 346   bool exclusiveAcquired = false;
 347   auto writer = thread([&] {
 348     lock.lock();
 349     exclusiveAcquired = true;
 350     lock.unlock();
 351   });
 352
 353   // lock() can't complete until we unlock token1, but it should stake
 354   // its claim with regards to other exclusive or upgrade locks.  We can
 355   // use try_lock_upgrade to poll for that eventuality.
 356   while (lock.try_lock_upgrade()) {
 357     lock.unlock_upgrade();
 358     this_thread::yield();
 359   }
 360   EXPECT_FALSE(exclusiveAcquired);
 361
 362   // Even though lock() is stuck we should be able to get token2
 363   EXPECT_TRUE(lock.try_lock_shared(token2));
 364   lock.unlock_shared(token1);
 365   lock.unlock_shared(token2);
 366   writer.join();
 367   EXPECT_TRUE(exclusiveAcquired);
 368 }
 369
 370 TEST(SharedMutex, write_has_prio) {
 371   SharedMutexWritePriority lock;
 372   SharedMutexToken token1;
 373   SharedMutexToken token2;
 374   lock.lock_shared(token1);
 375   auto writer = thread([&] {
 376     lock.lock();
 377     lock.unlock();
 378   });
 379
 380   // eventually lock() should block readers
 381   while (lock.try_lock_shared(token2)) {
 382     lock.unlock_shared(token2);
 383     this_thread::yield();
 384   }
 385
 386   lock.unlock_shared(token1);
 387   writer.join();
 388 }
 389
 390 struct TokenLocker {
 391   SharedMutexToken token;
 392
 393   template <typename T>
 394   void lock(T* lock) {
 395     lock->lock();
 396   }
 397
 398   template <typename T>
 399   void unlock(T* lock) {
 400     lock->unlock();
 401   }
 402
 403   template <typename T>
 404   void lock_shared(T* lock) {
 405     lock->lock_shared(token);
 406   }
 407
 408   template <typename T>
 409   void unlock_shared(T* lock) {
 410     lock->unlock_shared(token);
 411   }
 412 };
 413
 414 struct Locker {
 415   template <typename T>
 416   void lock(T* lock) {
 417     lock->lock();
 418   }
 419
 420   template <typename T>
 421   void unlock(T* lock) {
 422     lock->unlock();
 423   }
 424
 425   template <typename T>
 426   void lock_shared(T* lock) {
 427     lock->lock_shared();
 428   }
 429
 430   template <typename T>
 431   void unlock_shared(T* lock) {
 432     lock->unlock_shared();
 433   }
 434 };
 435
 436 struct EnterLocker {
 437   template <typename T>
 438   void lock(T* lock) {
 439     lock->lock(0);
 440   }
 441
 442   template <typename T>
 443   void unlock(T* lock) {
 444     lock->unlock();
 445   }
 446
 447   template <typename T>
 448   void lock_shared(T* lock) {
 449     lock->enter(0);
 450   }
 451
 452   template <typename T>
 453   void unlock_shared(T* lock) {
 454     lock->leave();
 455   }
 456 };
 457
 458 struct PosixRWLock {
 459   pthread_rwlock_t lock_;
 460
 461   PosixRWLock() { pthread_rwlock_init(&lock_, nullptr); }
 462
 463   ~PosixRWLock() { pthread_rwlock_destroy(&lock_); }
 464
 465   void lock() { pthread_rwlock_wrlock(&lock_); }
 466
 467   void unlock() { pthread_rwlock_unlock(&lock_); }
 468
 469   void lock_shared() { pthread_rwlock_rdlock(&lock_); }
 470
 471   void unlock_shared() { pthread_rwlock_unlock(&lock_); }
 472 };
 473
 474 struct PosixMutex {
 475   pthread_mutex_t lock_;
 476
 477   PosixMutex() { pthread_mutex_init(&lock_, nullptr); }
 478
 479   ~PosixMutex() { pthread_mutex_destroy(&lock_); }
 480
 481   void lock() { pthread_mutex_lock(&lock_); }
 482
 483   void unlock() { pthread_mutex_unlock(&lock_); }
 484
 485   void lock_shared() { pthread_mutex_lock(&lock_); }
 486
 487   void unlock_shared() { pthread_mutex_unlock(&lock_); }
 488 };
 489
 490 template <template <typename> class Atom, typename Lock, typename Locker>
 491 static void runContendedReaders(size_t numOps,
 492                                 size_t numThreads,
 493                                 bool useSeparateLocks) {
 494   char padding1[64];
 495   (void)padding1;
 496   Lock globalLock;
 497   int valueProtectedByLock = 10;
 498   char padding2[64];
 499   (void)padding2;
 500   Atom<bool> go(false);
 501   Atom<bool>* goPtr = &go; // workaround for clang bug
 502   vector<thread> threads(numThreads);
 503
 504   BENCHMARK_SUSPEND {
 505     for (size_t t = 0; t < numThreads; ++t) {
 506       threads[t] = DSched::thread([&, t, numThreads] {
 507         Lock privateLock;
 508         Lock* lock = useSeparateLocks ? &privateLock : &globalLock;
 509         Locker locker;
 510         while (!goPtr->load()) {
 511           this_thread::yield();
 512         }
 513         for (size_t op = t; op < numOps; op += numThreads) {
 514           locker.lock_shared(lock);
 515           // note: folly::doNotOptimizeAway reads and writes to its arg,
 516           // so the following two lines are very different than a call
 517           // to folly::doNotOptimizeAway(valueProtectedByLock);
 518           auto copy = valueProtectedByLock;
 519           folly::doNotOptimizeAway(copy);
 520           locker.unlock_shared(lock);
 521         }
 522       });
 523     }
 524   }
 525
 526   go.store(true);
 527   for (auto& thr : threads) {
 528     DSched::join(thr);
 529   }
 530 }
 531
 532 static void folly_rwspin_reads(uint32_t numOps,
 533                                size_t numThreads,
 534                                bool useSeparateLocks) {
 535   runContendedReaders<atomic, RWSpinLock, Locker>(
 536       numOps, numThreads, useSeparateLocks);
 537 }
 538
 539 static void shmtx_wr_pri_reads(uint32_t numOps,
 540                                size_t numThreads,
 541                                bool useSeparateLocks) {
 542   runContendedReaders<atomic, SharedMutexWritePriority, TokenLocker>(
 543       numOps, numThreads, useSeparateLocks);
 544 }
 545
 546 static void shmtx_w_bare_reads(uint32_t numOps,
 547                                size_t numThreads,
 548                                bool useSeparateLocks) {
 549   runContendedReaders<atomic, SharedMutexWritePriority, Locker>(
 550       numOps, numThreads, useSeparateLocks);
 551 }
 552
 553 static void shmtx_rd_pri_reads(uint32_t numOps,
 554                                size_t numThreads,
 555                                bool useSeparateLocks) {
 556   runContendedReaders<atomic, SharedMutexReadPriority, TokenLocker>(
 557       numOps, numThreads, useSeparateLocks);
 558 }
 559
 560 static void shmtx_r_bare_reads(uint32_t numOps,
 561                                size_t numThreads,
 562                                bool useSeparateLocks) {
 563   runContendedReaders<atomic, SharedMutexReadPriority, Locker>(
 564       numOps, numThreads, useSeparateLocks);
 565 }
 566
 567 static void folly_ticket_reads(uint32_t numOps,
 568                                size_t numThreads,
 569                                bool useSeparateLocks) {
 570   runContendedReaders<atomic, RWTicketSpinLock64, Locker>(
 571       numOps, numThreads, useSeparateLocks);
 572 }
 573
 574 static void boost_shared_reads(uint32_t numOps,
 575                                size_t numThreads,
 576                                bool useSeparateLocks) {
 577   runContendedReaders<atomic, boost::shared_mutex, Locker>(
 578       numOps, numThreads, useSeparateLocks);
 579 }
 580
 581 static void pthrd_rwlock_reads(uint32_t numOps,
 582                                size_t numThreads,
 583                                bool useSeparateLocks) {
 584   runContendedReaders<atomic, PosixRWLock, Locker>(
 585       numOps, numThreads, useSeparateLocks);
 586 }
 587
 588 template <template <typename> class Atom, typename Lock, typename Locker>
 589 static void runMixed(size_t numOps,
 590                      size_t numThreads,
 591                      double writeFraction,
 592                      bool useSeparateLocks) {
 593   char padding1[64];
 594   (void)padding1;
 595   Lock globalLock;
 596   int valueProtectedByLock = 0;
 597   char padding2[64];
 598   (void)padding2;
 599   Atom<bool> go(false);
 600   Atom<bool>* goPtr = &go; // workaround for clang bug
 601   vector<thread> threads(numThreads);
 602
 603   BENCHMARK_SUSPEND {
 604     for (size_t t = 0; t < numThreads; ++t) {
 605       threads[t] = DSched::thread([&, t, numThreads] {
 606         struct drand48_data buffer;
 607         srand48_r(t, &buffer);
 608         long writeThreshold = writeFraction * 0x7fffffff;
 609         Lock privateLock;
 610         Lock* lock = useSeparateLocks ? &privateLock : &globalLock;
 611         Locker locker;
 612         while (!goPtr->load()) {
 613           this_thread::yield();
 614         }
 615         for (size_t op = t; op < numOps; op += numThreads) {
 616           long randVal;
 617           lrand48_r(&buffer, &randVal);
 618           bool writeOp = randVal < writeThreshold;
 619           if (writeOp) {
 620             locker.lock(lock);
 621             if (!useSeparateLocks) {
 622               ++valueProtectedByLock;
 623             }
 624             locker.unlock(lock);
 625           } else {
 626             locker.lock_shared(lock);
 627             auto v = valueProtectedByLock;
 628             folly::doNotOptimizeAway(v);
 629             locker.unlock_shared(lock);
 630           }
 631         }
 632       });
 633     }
 634   }
 635
 636   go.store(true);
 637   for (auto& thr : threads) {
 638     DSched::join(thr);
 639   }
 640 }
 641
 642 static void folly_rwspin(size_t numOps,
 643                          size_t numThreads,
 644                          double writeFraction,
 645                          bool useSeparateLocks) {
 646   runMixed<atomic, RWSpinLock, Locker>(
 647       numOps, numThreads, writeFraction, useSeparateLocks);
 648 }
 649
 650 static void shmtx_wr_pri(uint32_t numOps,
 651                          size_t numThreads,
 652                          double writeFraction,
 653                          bool useSeparateLocks) {
 654   runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
 655       numOps, numThreads, writeFraction, useSeparateLocks);
 656 }
 657
 658 static void shmtx_w_bare(uint32_t numOps,
 659                          size_t numThreads,
 660                          double writeFraction,
 661                          bool useSeparateLocks) {
 662   runMixed<atomic, SharedMutexWritePriority, Locker>(
 663       numOps, numThreads, writeFraction, useSeparateLocks);
 664 }
 665
 666 static void shmtx_rd_pri(uint32_t numOps,
 667                          size_t numThreads,
 668                          double writeFraction,
 669                          bool useSeparateLocks) {
 670   runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
 671       numOps, numThreads, writeFraction, useSeparateLocks);
 672 }
 673
 674 static void shmtx_r_bare(uint32_t numOps,
 675                          size_t numThreads,
 676                          double writeFraction,
 677                          bool useSeparateLocks) {
 678   runMixed<atomic, SharedMutexReadPriority, Locker>(
 679       numOps, numThreads, writeFraction, useSeparateLocks);
 680 }
 681
 682 static void folly_ticket(size_t numOps,
 683                          size_t numThreads,
 684                          double writeFraction,
 685                          bool useSeparateLocks) {
 686   runMixed<atomic, RWTicketSpinLock64, Locker>(
 687       numOps, numThreads, writeFraction, useSeparateLocks);
 688 }
 689
 690 static void boost_shared(size_t numOps,
 691                          size_t numThreads,
 692                          double writeFraction,
 693                          bool useSeparateLocks) {
 694   runMixed<atomic, boost::shared_mutex, Locker>(
 695       numOps, numThreads, writeFraction, useSeparateLocks);
 696 }
 697
 698 static void pthrd_rwlock(size_t numOps,
 699                          size_t numThreads,
 700                          double writeFraction,
 701                          bool useSeparateLocks) {
 702   runMixed<atomic, PosixRWLock, Locker>(
 703       numOps, numThreads, writeFraction, useSeparateLocks);
 704 }
 705
 706 static void pthrd_mutex_(size_t numOps,
 707                          size_t numThreads,
 708                          double writeFraction,
 709                          bool useSeparateLocks) {
 710   runMixed<atomic, PosixMutex, Locker>(
 711       numOps, numThreads, writeFraction, useSeparateLocks);
 712 }
 713
 714 template <typename Lock, template <typename> class Atom>
 715 static void runAllAndValidate(size_t numOps, size_t numThreads) {
 716   Lock globalLock;
 717   Atom<int> globalExclusiveCount(0);
 718   Atom<int> globalUpgradeCount(0);
 719   Atom<int> globalSharedCount(0);
 720
 721   Atom<bool> go(false);
 722
 723   // clang crashes on access to Atom<> captured by ref in closure
 724   Atom<int>* globalExclusiveCountPtr = &globalExclusiveCount;
 725   Atom<int>* globalUpgradeCountPtr = &globalUpgradeCount;
 726   Atom<int>* globalSharedCountPtr = &globalSharedCount;
 727   Atom<bool>* goPtr = &go;
 728
 729   vector<thread> threads(numThreads);
 730
 731   BENCHMARK_SUSPEND {
 732     for (size_t t = 0; t < numThreads; ++t) {
 733       threads[t] = DSched::thread([&, t, numThreads] {
 734         struct drand48_data buffer;
 735         srand48_r(t, &buffer);
 736
 737         bool exclusive = false;
 738         bool upgrade = false;
 739         bool shared = false;
 740         bool ourGlobalTokenUsed = false;
 741         SharedMutexToken ourGlobalToken;
 742
 743         Lock privateLock;
 744         vector<SharedMutexToken> privateTokens;
 745
 746         while (!goPtr->load()) {
 747           this_thread::yield();
 748         }
 749         for (size_t op = t; op < numOps; op += numThreads) {
 750           // randVal in [0,1000)
 751           long randVal;
 752           lrand48_r(&buffer, &randVal);
 753           randVal = (long)((randVal * (uint64_t)1000) / 0x7fffffff);
 754
 755           // make as many assertions as possible about the global state
 756           if (exclusive) {
 757             EXPECT_EQ(1, globalExclusiveCountPtr->load(memory_order_acquire));
 758             EXPECT_EQ(0, globalUpgradeCountPtr->load(memory_order_acquire));
 759             EXPECT_EQ(0, globalSharedCountPtr->load(memory_order_acquire));
 760           }
 761           if (upgrade) {
 762             EXPECT_EQ(0, globalExclusiveCountPtr->load(memory_order_acquire));
 763             EXPECT_EQ(1, globalUpgradeCountPtr->load(memory_order_acquire));
 764           }
 765           if (shared) {
 766             EXPECT_EQ(0, globalExclusiveCountPtr->load(memory_order_acquire));
 767             EXPECT_TRUE(globalSharedCountPtr->load(memory_order_acquire) > 0);
 768           } else {
 769             EXPECT_FALSE(ourGlobalTokenUsed);
 770           }
 771
 772           // independent 20% chance we do something to the private lock
 773           if (randVal < 200) {
 774             // it's okay to take multiple private shared locks because
 775             // we never take an exclusive lock, so reader versus writer
 776             // priority doesn't cause deadlocks
 777             if (randVal < 100 && privateTokens.size() > 0) {
 778               auto i = randVal % privateTokens.size();
 779               privateLock.unlock_shared(privateTokens[i]);
 780               privateTokens.erase(privateTokens.begin() + i);
 781             } else {
 782               SharedMutexToken token;
 783               privateLock.lock_shared(token);
 784               privateTokens.push_back(token);
 785             }
 786             continue;
 787           }
 788
 789           // if we've got a lock, the only thing we can do is release it
 790           // or transform it into a different kind of lock
 791           if (exclusive) {
 792             exclusive = false;
 793             --*globalExclusiveCountPtr;
 794             if (randVal < 500) {
 795               globalLock.unlock();
 796             } else if (randVal < 700) {
 797               globalLock.unlock_and_lock_shared();
 798               ++*globalSharedCountPtr;
 799               shared = true;
 800             } else if (randVal < 900) {
 801               globalLock.unlock_and_lock_shared(ourGlobalToken);
 802               ++*globalSharedCountPtr;
 803               shared = true;
 804               ourGlobalTokenUsed = true;
 805             } else {
 806               globalLock.unlock_and_lock_upgrade();
 807               ++*globalUpgradeCountPtr;
 808               upgrade = true;
 809             }
 810           } else if (upgrade) {
 811             upgrade = false;
 812             --*globalUpgradeCountPtr;
 813             if (randVal < 500) {
 814               globalLock.unlock_upgrade();
 815             } else if (randVal < 700) {
 816               globalLock.unlock_upgrade_and_lock_shared();
 817               ++*globalSharedCountPtr;
 818               shared = true;
 819             } else if (randVal < 900) {
 820               globalLock.unlock_upgrade_and_lock_shared(ourGlobalToken);
 821               ++*globalSharedCountPtr;
 822               shared = true;
 823               ourGlobalTokenUsed = true;
 824             } else {
 825               globalLock.unlock_upgrade_and_lock();
 826               ++*globalExclusiveCountPtr;
 827               exclusive = true;
 828             }
 829           } else if (shared) {
 830             shared = false;
 831             --*globalSharedCountPtr;
 832             if (ourGlobalTokenUsed) {
 833               globalLock.unlock_shared(ourGlobalToken);
 834               ourGlobalTokenUsed = false;
 835             } else {
 836               globalLock.unlock_shared();
 837             }
 838           } else if (randVal < 400) {
 839             // 40% chance of shared lock with token, 5 ways to get it
 840
 841             // delta t goes from -1 millis to 7 millis
 842             auto dt = microseconds(10 * (randVal - 100));
 843
 844             if (randVal < 400) {
 845               globalLock.lock_shared(ourGlobalToken);
 846               shared = true;
 847             } else if (randVal < 500) {
 848               shared = globalLock.try_lock_shared(ourGlobalToken);
 849             } else if (randVal < 600) {
 850               shared = globalLock.try_lock_shared_for(dt, ourGlobalToken);
 851             } else if (randVal < 800) {
 852               shared = globalLock.try_lock_shared_until(
 853                   system_clock::now() + dt, ourGlobalToken);
 854             }
 855             if (shared) {
 856               ourGlobalTokenUsed = true;
 857               ++*globalSharedCountPtr;
 858             }
 859           } else if (randVal < 800) {
 860             // 40% chance of shared lock without token
 861             auto dt = microseconds(10 * (randVal - 100));
 862             if (randVal < 400) {
 863               globalLock.lock_shared();
 864               shared = true;
 865             } else if (randVal < 500) {
 866               shared = globalLock.try_lock_shared();
 867             } else if (randVal < 600) {
 868               shared = globalLock.try_lock_shared_for(dt);
 869             } else if (randVal < 800) {
 870               shared = globalLock.try_lock_shared_until(
 871                   system_clock::now() + dt);
 872             }
 873             if (shared) {
 874               ++*globalSharedCountPtr;
 875             }
 876           } else if (randVal < 900) {
 877             // 10% change of upgrade lock
 878             globalLock.lock_upgrade();
 879             upgrade = true;
 880             ++*globalUpgradeCountPtr;
 881           } else {
 882             // 10% chance of exclusive lock, 5 ways to get it
 883
 884             // delta t goes from -1 millis to 9 millis
 885             auto dt = microseconds(100 * (randVal - 910));
 886
 887             if (randVal < 400) {
 888               globalLock.lock();
 889               exclusive = true;
 890             } else if (randVal < 500) {
 891               exclusive = globalLock.try_lock();
 892             } else if (randVal < 600) {
 893               exclusive = globalLock.try_lock_for(dt);
 894             } else if (randVal < 700) {
 895               exclusive = globalLock.try_lock_until(steady_clock::now() + dt);
 896             } else {
 897               exclusive = globalLock.try_lock_until(system_clock::now() + dt);
 898             }
 899             if (exclusive) {
 900               ++*globalExclusiveCountPtr;
 901             }
 902           }
 903         }
 904
 905         if (exclusive) {
 906           --*globalExclusiveCountPtr;
 907           globalLock.unlock();
 908         }
 909         if (upgrade) {
 910           --*globalUpgradeCountPtr;
 911           globalLock.unlock_upgrade();
 912         }
 913         if (shared) {
 914           --*globalSharedCountPtr;
 915           if (ourGlobalTokenUsed) {
 916             globalLock.unlock_shared(ourGlobalToken);
 917             ourGlobalTokenUsed = false;
 918           } else {
 919             globalLock.unlock_shared();
 920           }
 921         }
 922         for (auto& token : privateTokens) {
 923           privateLock.unlock_shared(token);
 924         }
 925       });
 926     }
 927   }
 928
 929   go.store(true);
 930   for (auto& thr : threads) {
 931     DSched::join(thr);
 932   }
 933 }
 934
 935 TEST(SharedMutex, deterministic_concurrent_readers_of_one_lock_read_prio) {
 936   for (int pass = 0; pass < 3; ++pass) {
 937     DSched sched(DSched::uniform(pass));
 938     runContendedReaders<DeterministicAtomic,
 939                         DSharedMutexReadPriority,
 940                         Locker>(1000, 3, false);
 941   }
 942 }
 943
 944 TEST(SharedMutex, deterministic_concurrent_readers_of_one_lock_write_prio) {
 945   for (int pass = 0; pass < 3; ++pass) {
 946     DSched sched(DSched::uniform(pass));
 947     runContendedReaders<DeterministicAtomic,
 948                         DSharedMutexWritePriority,
 949                         Locker>(1000, 3, false);
 950   }
 951 }
 952
 953 TEST(SharedMutex, concurrent_readers_of_one_lock_read_prio) {
 954   for (int pass = 0; pass < 10; ++pass) {
 955     runContendedReaders<atomic, SharedMutexReadPriority, Locker>(
 956         100000, 32, false);
 957   }
 958 }
 959
 960 TEST(SharedMutex, concurrent_readers_of_one_lock_write_prio) {
 961   for (int pass = 0; pass < 10; ++pass) {
 962     runContendedReaders<atomic, SharedMutexWritePriority, Locker>(
 963         100000, 32, false);
 964   }
 965 }
 966
 967 TEST(SharedMutex, deterministic_readers_of_concurrent_locks_read_prio) {
 968   for (int pass = 0; pass < 3; ++pass) {
 969     DSched sched(DSched::uniform(pass));
 970     runContendedReaders<DeterministicAtomic,
 971                         DSharedMutexReadPriority,
 972                         Locker>(1000, 3, true);
 973   }
 974 }
 975
 976 TEST(SharedMutex, deterministic_readers_of_concurrent_locks_write_prio) {
 977   for (int pass = 0; pass < 3; ++pass) {
 978     DSched sched(DSched::uniform(pass));
 979     runContendedReaders<DeterministicAtomic,
 980                         DSharedMutexWritePriority,
 981                         Locker>(1000, 3, true);
 982   }
 983 }
 984
 985 TEST(SharedMutex, readers_of_concurrent_locks_read_prio) {
 986   for (int pass = 0; pass < 10; ++pass) {
 987     runContendedReaders<atomic, SharedMutexReadPriority, TokenLocker>(
 988         100000, 32, true);
 989   }
 990 }
 991
 992 TEST(SharedMutex, readers_of_concurrent_locks_write_prio) {
 993   for (int pass = 0; pass < 10; ++pass) {
 994     runContendedReaders<atomic, SharedMutexWritePriority, TokenLocker>(
 995         100000, 32, true);
 996   }
 997 }
 998
 999 TEST(SharedMutex, deterministic_mixed_mostly_read_read_prio) {
1000   for (int pass = 0; pass < 3; ++pass) {
1001     DSched sched(DSched::uniform(pass));
1002     runMixed<DeterministicAtomic, DSharedMutexReadPriority, Locker>(
1003         1000, 3, 0.1, false);
1004   }
1005 }
1006
1007 TEST(SharedMutex, deterministic_mixed_mostly_read_write_prio) {
1008   for (int pass = 0; pass < 3; ++pass) {
1009     DSched sched(DSched::uniform(pass));
1010     runMixed<DeterministicAtomic, DSharedMutexWritePriority, Locker>(
1011         1000, 3, 0.1, false);
1012   }
1013 }
1014
1015 TEST(SharedMutex, mixed_mostly_read_read_prio) {
1016   for (int pass = 0; pass < 5; ++pass) {
1017     runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
1018         10000, 32, 0.1, false);
1019   }
1020 }
1021
1022 TEST(SharedMutex, mixed_mostly_read_write_prio) {
1023   for (int pass = 0; pass < 5; ++pass) {
1024     runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
1025         10000, 32, 0.1, false);
1026   }
1027 }
1028
1029 TEST(SharedMutex, deterministic_mixed_mostly_write_read_prio) {
1030   for (int pass = 0; pass < 1; ++pass) {
1031     DSched sched(DSched::uniform(pass));
1032     runMixed<DeterministicAtomic, DSharedMutexReadPriority, TokenLocker>(
1033         1000, 10, 0.9, false);
1034   }
1035 }
1036
1037 TEST(SharedMutex, deterministic_mixed_mostly_write_write_prio) {
1038   for (int pass = 0; pass < 1; ++pass) {
1039     DSched sched(DSched::uniform(pass));
1040     runMixed<DeterministicAtomic, DSharedMutexWritePriority, TokenLocker>(
1041         1000, 10, 0.9, false);
1042   }
1043 }
1044
1045 TEST(SharedMutex, deterministic_lost_wakeup_write_prio) {
1046   for (int pass = 0; pass < 10; ++pass) {
1047     DSched sched(DSched::uniformSubset(pass, 2, 200));
1048     runMixed<DeterministicAtomic, DSharedMutexWritePriority, TokenLocker>(
1049         1000, 3, 1.0, false);
1050   }
1051 }
1052
1053 TEST(SharedMutex, mixed_mostly_write_read_prio) {
1054   for (int pass = 0; pass < (folly::kIsSanitizeAddress ? 1 : 5); ++pass) {
1055     runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
1056         50000, 300, 0.9, false);
1057   }
1058 }
1059
1060 TEST(SharedMutex, mixed_mostly_write_write_prio) {
1061   for (int pass = 0; pass < (folly::kIsSanitizeAddress ? 1 : 5); ++pass) {
1062     runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
1063         50000, 300, 0.9, false);
1064   }
1065 }
1066
1067 TEST(SharedMutex, deterministic_all_ops_read_prio) {
1068   for (int pass = 0; pass < 5; ++pass) {
1069     DSched sched(DSched::uniform(pass));
1070     runAllAndValidate<DSharedMutexReadPriority, DeterministicAtomic>(1000, 8);
1071   }
1072 }
1073
1074 TEST(SharedMutex, deterministic_all_ops_write_prio) {
1075   for (int pass = 0; pass < 5; ++pass) {
1076     DSched sched(DSched::uniform(pass));
1077     runAllAndValidate<DSharedMutexWritePriority, DeterministicAtomic>(1000, 8);
1078   }
1079 }
1080
1081 TEST(SharedMutex, all_ops_read_prio) {
1082   for (int pass = 0; pass < 5; ++pass) {
1083     runAllAndValidate<SharedMutexReadPriority, atomic>(100000, 32);
1084   }
1085 }
1086
1087 TEST(SharedMutex, all_ops_write_prio) {
1088   for (int pass = 0; pass < 5; ++pass) {
1089     runAllAndValidate<SharedMutexWritePriority, atomic>(100000, 32);
1090   }
1091 }
1092
1093 FOLLY_ASSUME_FBVECTOR_COMPATIBLE(
1094     boost::optional<boost::optional<SharedMutexToken>>)
1095
1096 // Setup is a set of threads that either grab a shared lock, or exclusive
1097 // and then downgrade it, or upgrade then upgrade and downgrade, then
1098 // enqueue the shared lock to a second set of threads that just performs
1099 // unlocks.  Half of the shared locks use tokens, the others don't.
1100 template <typename Lock, template <typename> class Atom>
1101 static void runRemoteUnlock(size_t numOps,
1102                             double preWriteFraction,
1103                             double preUpgradeFraction,
1104                             size_t numSendingThreads,
1105                             size_t numReceivingThreads) {
1106   Lock globalLock;
1107   MPMCQueue<boost::optional<boost::optional<SharedMutexToken>>, Atom>
1108     queue(10);
1109   auto queuePtr = &queue; // workaround for clang crash
1110
1111   Atom<bool> go(false);
1112   auto goPtr = &go; // workaround for clang crash
1113   Atom<int> pendingSenders(numSendingThreads);
1114   auto pendingSendersPtr = &pendingSenders; // workaround for clang crash
1115   vector<thread> threads(numSendingThreads + numReceivingThreads);
1116
1117   BENCHMARK_SUSPEND {
1118     for (size_t t = 0; t < threads.size(); ++t) {
1119       threads[t] = DSched::thread([&, t, numSendingThreads] {
1120         if (t >= numSendingThreads) {
1121           // we're a receiver
1122           typename decltype(queue)::value_type elem;
1123           while (true) {
1124             queuePtr->blockingRead(elem);
1125             if (!elem) {
1126               // EOF, pass the EOF token
1127               queuePtr->blockingWrite(std::move(elem));
1128               break;
1129             }
1130             if (*elem) {
1131               globalLock.unlock_shared(**elem);
1132             } else {
1133               globalLock.unlock_shared();
1134             }
1135           }
1136           return;
1137         }
1138         // else we're a sender
1139
1140         struct drand48_data buffer;
1141         srand48_r(t, &buffer);
1142
1143         while (!goPtr->load()) {
1144           this_thread::yield();
1145         }
1146         for (size_t op = t; op < numOps; op += numSendingThreads) {
1147           long unscaledRandVal;
1148           lrand48_r(&buffer, &unscaledRandVal);
1149
1150           // randVal in [0,1]
1151           double randVal = ((double)unscaledRandVal) / 0x7fffffff;
1152
1153           // extract a bit and rescale
1154           bool useToken = randVal >= 0.5;
1155           randVal = (randVal - (useToken ? 0.5 : 0.0)) * 2;
1156
1157           boost::optional<SharedMutexToken> maybeToken;
1158
1159           if (useToken) {
1160             SharedMutexToken token;
1161             if (randVal < preWriteFraction) {
1162               globalLock.lock();
1163               globalLock.unlock_and_lock_shared(token);
1164             } else if (randVal < preWriteFraction + preUpgradeFraction / 2) {
1165               globalLock.lock_upgrade();
1166               globalLock.unlock_upgrade_and_lock_shared(token);
1167             } else if (randVal < preWriteFraction + preUpgradeFraction) {
1168               globalLock.lock_upgrade();
1169               globalLock.unlock_upgrade_and_lock();
1170               globalLock.unlock_and_lock_shared(token);
1171             } else {
1172               globalLock.lock_shared(token);
1173             }
1174             maybeToken = token;
1175           } else {
1176             if (randVal < preWriteFraction) {
1177               globalLock.lock();
1178               globalLock.unlock_and_lock_shared();
1179             } else if (randVal < preWriteFraction + preUpgradeFraction / 2) {
1180               globalLock.lock_upgrade();
1181               globalLock.unlock_upgrade_and_lock_shared();
1182             } else if (randVal < preWriteFraction + preUpgradeFraction) {
1183               globalLock.lock_upgrade();
1184               globalLock.unlock_upgrade_and_lock();
1185               globalLock.unlock_and_lock_shared();
1186             } else {
1187               globalLock.lock_shared();
1188             }
1189           }
1190
1191           // blockingWrite is emplace-like, so this automatically adds
1192           // another level of wrapping
1193           queuePtr->blockingWrite(maybeToken);
1194         }
1195         if (--*pendingSendersPtr == 0) {
1196           queuePtr->blockingWrite(boost::none);
1197         }
1198       });
1199     }
1200   }
1201
1202   go.store(true);
1203   for (auto& thr : threads) {
1204     DSched::join(thr);
1205   }
1206 }
1207
1208 TEST(SharedMutex, deterministic_remote_write_prio) {
1209   for (int pass = 0; pass < 1; ++pass) {
1210     DSched sched(DSched::uniform(pass));
1211     runRemoteUnlock<DSharedMutexWritePriority, DeterministicAtomic>(
1212         500, 0.1, 0.1, 5, 5);
1213   }
1214 }
1215
1216 TEST(SharedMutex, deterministic_remote_read_prio) {
1217   for (int pass = 0; pass < 1; ++pass) {
1218     DSched sched(DSched::uniform(pass));
1219     runRemoteUnlock<DSharedMutexReadPriority, DeterministicAtomic>(
1220         500, 0.1, 0.1, 5, 5);
1221   }
1222 }
1223
1224 TEST(SharedMutex, remote_write_prio) {
1225   for (int pass = 0; pass < 10; ++pass) {
1226     runRemoteUnlock<SharedMutexWritePriority, atomic>(100000, 0.1, 0.1, 5, 5);
1227   }
1228 }
1229
1230 TEST(SharedMutex, remote_read_prio) {
1231   for (int pass = 0; pass < (folly::kIsSanitizeAddress ? 1 : 100); ++pass) {
1232     runRemoteUnlock<SharedMutexReadPriority, atomic>(100000, 0.1, 0.1, 5, 5);
1233   }
1234 }
1235
1236 static void burn(size_t n) {
1237   for (size_t i = 0; i < n; ++i) {
1238     folly::doNotOptimizeAway(i);
1239   }
1240 }
1241
1242 // Two threads and three locks, arranged so that they have to proceed
1243 // in turn with reader/writer conflict
1244 template <typename Lock, template <typename> class Atom = atomic>
1245 static void runPingPong(size_t numRounds, size_t burnCount) {
1246   char padding1[56];
1247   (void)padding1;
1248   pair<Lock, char[56]> locks[3];
1249   char padding2[56];
1250   (void)padding2;
1251
1252   Atom<int> avail(0);
1253   auto availPtr = &avail; // workaround for clang crash
1254   Atom<bool> go(false);
1255   auto goPtr = &go; // workaround for clang crash
1256   vector<thread> threads(2);
1257
1258   locks[0].first.lock();
1259   locks[1].first.lock();
1260   locks[2].first.lock_shared();
1261
1262   BENCHMARK_SUSPEND {
1263     threads[0] = DSched::thread([&] {
1264       ++*availPtr;
1265       while (!goPtr->load()) {
1266         this_thread::yield();
1267       }
1268       for (size_t i = 0; i < numRounds; ++i) {
1269         locks[i % 3].first.unlock();
1270         locks[(i + 2) % 3].first.lock();
1271         burn(burnCount);
1272       }
1273     });
1274     threads[1] = DSched::thread([&] {
1275       ++*availPtr;
1276       while (!goPtr->load()) {
1277         this_thread::yield();
1278       }
1279       for (size_t i = 0; i < numRounds; ++i) {
1280         locks[i % 3].first.lock_shared();
1281         burn(burnCount);
1282         locks[(i + 2) % 3].first.unlock_shared();
1283       }
1284     });
1285
1286     while (avail.load() < 2) {
1287       this_thread::yield();
1288     }
1289   }
1290
1291   go.store(true);
1292   for (auto& thr : threads) {
1293     DSched::join(thr);
1294   }
1295   locks[numRounds % 3].first.unlock();
1296   locks[(numRounds + 1) % 3].first.unlock();
1297   locks[(numRounds + 2) % 3].first.unlock_shared();
1298 }
1299
1300 static void folly_rwspin_ping_pong(size_t n, size_t scale, size_t burnCount) {
1301   runPingPong<RWSpinLock>(n / scale, burnCount);
1302 }
1303
1304 static void shmtx_w_bare_ping_pong(size_t n, size_t scale, size_t burnCount) {
1305   runPingPong<SharedMutexWritePriority>(n / scale, burnCount);
1306 }
1307
1308 static void shmtx_r_bare_ping_pong(size_t n, size_t scale, size_t burnCount) {
1309   runPingPong<SharedMutexReadPriority>(n / scale, burnCount);
1310 }
1311
1312 static void folly_ticket_ping_pong(size_t n, size_t scale, size_t burnCount) {
1313   runPingPong<RWTicketSpinLock64>(n / scale, burnCount);
1314 }
1315
1316 static void boost_shared_ping_pong(size_t n, size_t scale, size_t burnCount) {
1317   runPingPong<boost::shared_mutex>(n / scale, burnCount);
1318 }
1319
1320 static void pthrd_rwlock_ping_pong(size_t n, size_t scale, size_t burnCount) {
1321   runPingPong<PosixRWLock>(n / scale, burnCount);
1322 }
1323
1324 TEST(SharedMutex, deterministic_ping_pong_write_prio) {
1325   for (int pass = 0; pass < 1; ++pass) {
1326     DSched sched(DSched::uniform(pass));
1327     runPingPong<DSharedMutexWritePriority, DeterministicAtomic>(500, 0);
1328   }
1329 }
1330
1331 TEST(SharedMutex, deterministic_ping_pong_read_prio) {
1332   for (int pass = 0; pass < 1; ++pass) {
1333     DSched sched(DSched::uniform(pass));
1334     runPingPong<DSharedMutexReadPriority, DeterministicAtomic>(500, 0);
1335   }
1336 }
1337
1338 TEST(SharedMutex, ping_pong_write_prio) {
1339   for (int pass = 0; pass < 1; ++pass) {
1340     runPingPong<SharedMutexWritePriority, atomic>(50000, 0);
1341   }
1342 }
1343
1344 TEST(SharedMutex, ping_pong_read_prio) {
1345   for (int pass = 0; pass < 1; ++pass) {
1346     runPingPong<SharedMutexReadPriority, atomic>(50000, 0);
1347   }
1348 }
1349
1350 // This is here so you can tell how much of the runtime reported by the
1351 // more complex harnesses is due to the harness, although due to the
1352 // magic of compiler optimization it may also be slower
1353 BENCHMARK(single_thread_lock_shared_unlock_shared, iters) {
1354   SharedMutex lock;
1355   for (size_t n = 0; n < iters; ++n) {
1356     SharedMutex::Token token;
1357     lock.lock_shared(token);
1358     folly::doNotOptimizeAway(0);
1359     lock.unlock_shared(token);
1360   }
1361 }
1362
1363 BENCHMARK(single_thread_lock_unlock, iters) {
1364   SharedMutex lock;
1365   for (size_t n = 0; n < iters; ++n) {
1366     lock.lock();
1367     folly::doNotOptimizeAway(0);
1368     lock.unlock();
1369   }
1370 }
1371
1372 #define BENCH_BASE(...) FB_VA_GLUE(BENCHMARK_NAMED_PARAM, (__VA_ARGS__))
1373 #define BENCH_REL(...) FB_VA_GLUE(BENCHMARK_RELATIVE_NAMED_PARAM, (__VA_ARGS__))
1374
1375 // 100% reads.  Best-case scenario for deferred locks.  Lock is colocated
1376 // with read data, so inline lock takes cache miss every time but deferred
1377 // lock has only cache hits and local access.
1378 BENCHMARK_DRAW_LINE()
1379 BENCHMARK_DRAW_LINE()
1380 BENCH_BASE(folly_rwspin_reads, 1thread, 1, false)
1381 BENCH_REL (shmtx_wr_pri_reads, 1thread, 1, false)
1382 BENCH_REL (shmtx_w_bare_reads, 1thread, 1, false)
1383 BENCH_REL (shmtx_rd_pri_reads, 1thread, 1, false)
1384 BENCH_REL (shmtx_r_bare_reads, 1thread, 1, false)
1385 BENCH_REL (folly_ticket_reads, 1thread, 1, false)
1386 BENCH_REL (boost_shared_reads, 1thread, 1, false)
1387 BENCH_REL (pthrd_rwlock_reads, 1thread, 1, false)
1388 BENCHMARK_DRAW_LINE()
1389 BENCH_BASE(folly_rwspin_reads, 2thread, 2, false)
1390 BENCH_REL (shmtx_wr_pri_reads, 2thread, 2, false)
1391 BENCH_REL (shmtx_w_bare_reads, 2thread, 2, false)
1392 BENCH_REL (shmtx_rd_pri_reads, 2thread, 2, false)
1393 BENCH_REL (shmtx_r_bare_reads, 2thread, 2, false)
1394 BENCH_REL (folly_ticket_reads, 2thread, 2, false)
1395 BENCH_REL (boost_shared_reads, 2thread, 2, false)
1396 BENCH_REL (pthrd_rwlock_reads, 2thread, 2, false)
1397 BENCHMARK_DRAW_LINE()
1398 BENCH_BASE(folly_rwspin_reads, 4thread, 4, false)
1399 BENCH_REL (shmtx_wr_pri_reads, 4thread, 4, false)
1400 BENCH_REL (shmtx_w_bare_reads, 4thread, 4, false)
1401 BENCH_REL (shmtx_rd_pri_reads, 4thread, 4, false)
1402 BENCH_REL (shmtx_r_bare_reads, 4thread, 4, false)
1403 BENCH_REL (folly_ticket_reads, 4thread, 4, false)
1404 BENCH_REL (boost_shared_reads, 4thread, 4, false)
1405 BENCH_REL (pthrd_rwlock_reads, 4thread, 4, false)
1406 BENCHMARK_DRAW_LINE()
1407 BENCH_BASE(folly_rwspin_reads, 8thread, 8, false)
1408 BENCH_REL (shmtx_wr_pri_reads, 8thread, 8, false)
1409 BENCH_REL (shmtx_w_bare_reads, 8thread, 8, false)
1410 BENCH_REL (shmtx_rd_pri_reads, 8thread, 8, false)
1411 BENCH_REL (shmtx_r_bare_reads, 8thread, 8, false)
1412 BENCH_REL (folly_ticket_reads, 8thread, 8, false)
1413 BENCH_REL (boost_shared_reads, 8thread, 8, false)
1414 BENCH_REL (pthrd_rwlock_reads, 8thread, 8, false)
1415 BENCHMARK_DRAW_LINE()
1416 BENCH_BASE(folly_rwspin_reads, 16thread, 16, false)
1417 BENCH_REL (shmtx_wr_pri_reads, 16thread, 16, false)
1418 BENCH_REL (shmtx_w_bare_reads, 16thread, 16, false)
1419 BENCH_REL (shmtx_rd_pri_reads, 16thread, 16, false)
1420 BENCH_REL (shmtx_r_bare_reads, 16thread, 16, false)
1421 BENCH_REL (folly_ticket_reads, 16thread, 16, false)
1422 BENCH_REL (boost_shared_reads, 16thread, 16, false)
1423 BENCH_REL (pthrd_rwlock_reads, 16thread, 16, false)
1424 BENCHMARK_DRAW_LINE()
1425 BENCH_BASE(folly_rwspin_reads, 32thread, 32, false)
1426 BENCH_REL (shmtx_wr_pri_reads, 32thread, 32, false)
1427 BENCH_REL (shmtx_w_bare_reads, 32thread, 32, false)
1428 BENCH_REL (shmtx_rd_pri_reads, 32thread, 32, false)
1429 BENCH_REL (shmtx_r_bare_reads, 32thread, 32, false)
1430 BENCH_REL (folly_ticket_reads, 32thread, 32, false)
1431 BENCH_REL (boost_shared_reads, 32thread, 32, false)
1432 BENCH_REL (pthrd_rwlock_reads, 32thread, 32, false)
1433 BENCHMARK_DRAW_LINE()
1434 BENCH_BASE(folly_rwspin_reads, 64thread, 64, false)
1435 BENCH_REL (shmtx_wr_pri_reads, 64thread, 64, false)
1436 BENCH_REL (shmtx_w_bare_reads, 64thread, 64, false)
1437 BENCH_REL (shmtx_rd_pri_reads, 64thread, 64, false)
1438 BENCH_REL (shmtx_r_bare_reads, 64thread, 64, false)
1439 BENCH_REL (folly_ticket_reads, 64thread, 64, false)
1440 BENCH_REL (boost_shared_reads, 64thread, 64, false)
1441 BENCH_REL (pthrd_rwlock_reads, 64thread, 64, false)
1442
1443 // 1 lock used by everybody, 100% writes.  Threads only hurt, but it is
1444 // good to not fail catastrophically.  Compare to single_thread_lock_unlock
1445 // to see the overhead of the generic driver (and its pseudo-random number
1446 // generator).  pthrd_mutex_ is a pthread_mutex_t (default, not adaptive),
1447 // which is better than any of the reader-writer locks for this scenario.
1448 BENCHMARK_DRAW_LINE()
1449 BENCHMARK_DRAW_LINE()
1450 BENCH_BASE(folly_rwspin, 1thread_all_write, 1, 1.0, false)
1451 BENCH_REL (shmtx_wr_pri, 1thread_all_write, 1, 1.0, false)
1452 BENCH_REL (shmtx_rd_pri, 1thread_all_write, 1, 1.0, false)
1453 BENCH_REL (folly_ticket, 1thread_all_write, 1, 1.0, false)
1454 BENCH_REL (boost_shared, 1thread_all_write, 1, 1.0, false)
1455 BENCH_REL (pthrd_rwlock, 1thread_all_write, 1, 1.0, false)
1456 BENCH_REL (pthrd_mutex_, 1thread_all_write, 1, 1.0, false)
1457 BENCHMARK_DRAW_LINE()
1458 BENCH_BASE(folly_rwspin, 2thread_all_write, 2, 1.0, false)
1459 BENCH_REL (shmtx_wr_pri, 2thread_all_write, 2, 1.0, false)
1460 BENCH_REL (shmtx_rd_pri, 2thread_all_write, 2, 1.0, false)
1461 BENCH_REL (folly_ticket, 2thread_all_write, 2, 1.0, false)
1462 BENCH_REL (boost_shared, 2thread_all_write, 2, 1.0, false)
1463 BENCH_REL (pthrd_rwlock, 2thread_all_write, 2, 1.0, false)
1464 BENCH_REL (pthrd_mutex_, 2thread_all_write, 2, 1.0, false)
1465 BENCHMARK_DRAW_LINE()
1466 BENCH_BASE(folly_rwspin, 4thread_all_write, 4, 1.0, false)
1467 BENCH_REL (shmtx_wr_pri, 4thread_all_write, 4, 1.0, false)
1468 BENCH_REL (shmtx_rd_pri, 4thread_all_write, 4, 1.0, false)
1469 BENCH_REL (folly_ticket, 4thread_all_write, 4, 1.0, false)
1470 BENCH_REL (boost_shared, 4thread_all_write, 4, 1.0, false)
1471 BENCH_REL (pthrd_rwlock, 4thread_all_write, 4, 1.0, false)
1472 BENCH_REL (pthrd_mutex_, 4thread_all_write, 4, 1.0, false)
1473 BENCHMARK_DRAW_LINE()
1474 BENCH_BASE(folly_rwspin, 8thread_all_write, 8, 1.0, false)
1475 BENCH_REL (shmtx_wr_pri, 8thread_all_write, 8, 1.0, false)
1476 BENCH_REL (shmtx_rd_pri, 8thread_all_write, 8, 1.0, false)
1477 BENCH_REL (folly_ticket, 8thread_all_write, 8, 1.0, false)
1478 BENCH_REL (boost_shared, 8thread_all_write, 8, 1.0, false)
1479 BENCH_REL (pthrd_rwlock, 8thread_all_write, 8, 1.0, false)
1480 BENCH_REL (pthrd_mutex_, 8thread_all_write, 8, 1.0, false)
1481 BENCHMARK_DRAW_LINE()
1482 BENCH_BASE(folly_rwspin, 16thread_all_write, 16, 1.0, false)
1483 BENCH_REL (shmtx_wr_pri, 16thread_all_write, 16, 1.0, false)
1484 BENCH_REL (shmtx_rd_pri, 16thread_all_write, 16, 1.0, false)
1485 BENCH_REL (folly_ticket, 16thread_all_write, 16, 1.0, false)
1486 BENCH_REL (boost_shared, 16thread_all_write, 16, 1.0, false)
1487 BENCH_REL (pthrd_rwlock, 16thread_all_write, 16, 1.0, false)
1488 BENCH_REL (pthrd_mutex_, 16thread_all_write, 16, 1.0, false)
1489 BENCHMARK_DRAW_LINE()
1490 BENCH_BASE(folly_rwspin, 32thread_all_write, 32, 1.0, false)
1491 BENCH_REL (shmtx_wr_pri, 32thread_all_write, 32, 1.0, false)
1492 BENCH_REL (shmtx_rd_pri, 32thread_all_write, 32, 1.0, false)
1493 BENCH_REL (folly_ticket, 32thread_all_write, 32, 1.0, false)
1494 BENCH_REL (boost_shared, 32thread_all_write, 32, 1.0, false)
1495 BENCH_REL (pthrd_rwlock, 32thread_all_write, 32, 1.0, false)
1496 BENCH_REL (pthrd_mutex_, 32thread_all_write, 32, 1.0, false)
1497 BENCHMARK_DRAW_LINE()
1498 BENCH_BASE(folly_rwspin, 64thread_all_write, 64, 1.0, false)
1499 BENCH_REL (shmtx_wr_pri, 64thread_all_write, 64, 1.0, false)
1500 BENCH_REL (shmtx_rd_pri, 64thread_all_write, 64, 1.0, false)
1501 BENCH_REL (folly_ticket, 64thread_all_write, 64, 1.0, false)
1502 BENCH_REL (boost_shared, 64thread_all_write, 64, 1.0, false)
1503 BENCH_REL (pthrd_rwlock, 64thread_all_write, 64, 1.0, false)
1504 BENCH_REL (pthrd_mutex_, 64thread_all_write, 64, 1.0, false)
1505
1506 // 1 lock used by everybody, 10% writes.  Not much scaling to be had.  Perf
1507 // is best at 1 thread, once you've got multiple threads > 8 threads hurts.
1508 BENCHMARK_DRAW_LINE()
1509 BENCHMARK_DRAW_LINE()
1510 BENCH_BASE(folly_rwspin, 1thread_10pct_write, 1, 0.10, false)
1511 BENCH_REL (shmtx_wr_pri, 1thread_10pct_write, 1, 0.10, false)
1512 BENCH_REL (shmtx_rd_pri, 1thread_10pct_write, 1, 0.10, false)
1513 BENCH_REL (folly_ticket, 1thread_10pct_write, 1, 0.10, false)
1514 BENCH_REL (boost_shared, 1thread_10pct_write, 1, 0.10, false)
1515 BENCH_REL (pthrd_rwlock, 1thread_10pct_write, 1, 0.10, false)
1516 BENCHMARK_DRAW_LINE()
1517 BENCH_BASE(folly_rwspin, 2thread_10pct_write, 2, 0.10, false)
1518 BENCH_REL (shmtx_wr_pri, 2thread_10pct_write, 2, 0.10, false)
1519 BENCH_REL (shmtx_rd_pri, 2thread_10pct_write, 2, 0.10, false)
1520 BENCH_REL (folly_ticket, 2thread_10pct_write, 2, 0.10, false)
1521 BENCH_REL (boost_shared, 2thread_10pct_write, 2, 0.10, false)
1522 BENCH_REL (pthrd_rwlock, 2thread_10pct_write, 2, 0.10, false)
1523 BENCHMARK_DRAW_LINE()
1524 BENCH_BASE(folly_rwspin, 4thread_10pct_write, 4, 0.10, false)
1525 BENCH_REL (shmtx_wr_pri, 4thread_10pct_write, 4, 0.10, false)
1526 BENCH_REL (shmtx_rd_pri, 4thread_10pct_write, 4, 0.10, false)
1527 BENCH_REL (folly_ticket, 4thread_10pct_write, 4, 0.10, false)
1528 BENCH_REL (boost_shared, 4thread_10pct_write, 4, 0.10, false)
1529 BENCH_REL (pthrd_rwlock, 4thread_10pct_write, 4, 0.10, false)
1530 BENCHMARK_DRAW_LINE()
1531 BENCH_BASE(folly_rwspin, 8thread_10pct_write, 8, 0.10, false)
1532 BENCH_REL (shmtx_wr_pri, 8thread_10pct_write, 8, 0.10, false)
1533 BENCH_REL (shmtx_rd_pri, 8thread_10pct_write, 8, 0.10, false)
1534 BENCH_REL (folly_ticket, 8thread_10pct_write, 8, 0.10, false)
1535 BENCH_REL (boost_shared, 8thread_10pct_write, 8, 0.10, false)
1536 BENCH_REL (pthrd_rwlock, 8thread_10pct_write, 8, 0.10, false)
1537 BENCHMARK_DRAW_LINE()
1538 BENCH_BASE(folly_rwspin, 16thread_10pct_write, 16, 0.10, false)
1539 BENCH_REL (shmtx_wr_pri, 16thread_10pct_write, 16, 0.10, false)
1540 BENCH_REL (shmtx_rd_pri, 16thread_10pct_write, 16, 0.10, false)
1541 BENCH_REL (folly_ticket, 16thread_10pct_write, 16, 0.10, false)
1542 BENCH_REL (boost_shared, 16thread_10pct_write, 16, 0.10, false)
1543 BENCH_REL (pthrd_rwlock, 16thread_10pct_write, 16, 0.10, false)
1544 BENCHMARK_DRAW_LINE()
1545 BENCH_BASE(folly_rwspin, 32thread_10pct_write, 32, 0.10, false)
1546 BENCH_REL (shmtx_wr_pri, 32thread_10pct_write, 32, 0.10, false)
1547 BENCH_REL (shmtx_rd_pri, 32thread_10pct_write, 32, 0.10, false)
1548 BENCH_REL (folly_ticket, 32thread_10pct_write, 32, 0.10, false)
1549 BENCH_REL (boost_shared, 32thread_10pct_write, 32, 0.10, false)
1550 BENCH_REL (pthrd_rwlock, 32thread_10pct_write, 32, 0.10, false)
1551 BENCHMARK_DRAW_LINE()
1552 BENCH_BASE(folly_rwspin, 64thread_10pct_write, 64, 0.10, false)
1553 BENCH_REL (shmtx_wr_pri, 64thread_10pct_write, 64, 0.10, false)
1554 BENCH_REL (shmtx_rd_pri, 64thread_10pct_write, 64, 0.10, false)
1555 BENCH_REL (folly_ticket, 64thread_10pct_write, 64, 0.10, false)
1556 BENCH_REL (boost_shared, 64thread_10pct_write, 64, 0.10, false)
1557 BENCH_REL (pthrd_rwlock, 64thread_10pct_write, 64, 0.10, false)
1558
1559 // 1 lock used by everybody, 1% writes.  This is a more realistic example
1560 // than the concurrent_*_reads benchmark, but still shows SharedMutex locks
1561 // winning over all of the others
1562 BENCHMARK_DRAW_LINE()
1563 BENCHMARK_DRAW_LINE()
1564 BENCH_BASE(folly_rwspin, 1thread_1pct_write, 1, 0.01, false)
1565 BENCH_REL (shmtx_wr_pri, 1thread_1pct_write, 1, 0.01, false)
1566 BENCH_REL (shmtx_w_bare, 1thread_1pct_write, 1, 0.01, false)
1567 BENCH_REL (shmtx_rd_pri, 1thread_1pct_write, 1, 0.01, false)
1568 BENCH_REL (shmtx_r_bare, 1thread_1pct_write, 1, 0.01, false)
1569 BENCH_REL (folly_ticket, 1thread_1pct_write, 1, 0.01, false)
1570 BENCH_REL (boost_shared, 1thread_1pct_write, 1, 0.01, false)
1571 BENCH_REL (pthrd_rwlock, 1thread_1pct_write, 1, 0.01, false)
1572 BENCHMARK_DRAW_LINE()
1573 BENCH_BASE(folly_rwspin, 2thread_1pct_write, 2, 0.01, false)
1574 BENCH_REL (shmtx_wr_pri, 2thread_1pct_write, 2, 0.01, false)
1575 BENCH_REL (shmtx_w_bare, 2thread_1pct_write, 2, 0.01, false)
1576 BENCH_REL (shmtx_rd_pri, 2thread_1pct_write, 2, 0.01, false)
1577 BENCH_REL (shmtx_r_bare, 2thread_1pct_write, 2, 0.01, false)
1578 BENCH_REL (folly_ticket, 2thread_1pct_write, 2, 0.01, false)
1579 BENCH_REL (boost_shared, 2thread_1pct_write, 2, 0.01, false)
1580 BENCH_REL (pthrd_rwlock, 2thread_1pct_write, 2, 0.01, false)
1581 BENCHMARK_DRAW_LINE()
1582 BENCH_BASE(folly_rwspin, 4thread_1pct_write, 4, 0.01, false)
1583 BENCH_REL (shmtx_wr_pri, 4thread_1pct_write, 4, 0.01, false)
1584 BENCH_REL (shmtx_w_bare, 4thread_1pct_write, 4, 0.01, false)
1585 BENCH_REL (shmtx_rd_pri, 4thread_1pct_write, 4, 0.01, false)
1586 BENCH_REL (shmtx_r_bare, 4thread_1pct_write, 4, 0.01, false)
1587 BENCH_REL (folly_ticket, 4thread_1pct_write, 4, 0.01, false)
1588 BENCH_REL (boost_shared, 4thread_1pct_write, 4, 0.01, false)
1589 BENCH_REL (pthrd_rwlock, 4thread_1pct_write, 4, 0.01, false)
1590 BENCHMARK_DRAW_LINE()
1591 BENCH_BASE(folly_rwspin, 8thread_1pct_write, 8, 0.01, false)
1592 BENCH_REL (shmtx_wr_pri, 8thread_1pct_write, 8, 0.01, false)
1593 BENCH_REL (shmtx_w_bare, 8thread_1pct_write, 8, 0.01, false)
1594 BENCH_REL (shmtx_rd_pri, 8thread_1pct_write, 8, 0.01, false)
1595 BENCH_REL (shmtx_r_bare, 8thread_1pct_write, 8, 0.01, false)
1596 BENCH_REL (folly_ticket, 8thread_1pct_write, 8, 0.01, false)
1597 BENCH_REL (boost_shared, 8thread_1pct_write, 8, 0.01, false)
1598 BENCH_REL (pthrd_rwlock, 8thread_1pct_write, 8, 0.01, false)
1599 BENCHMARK_DRAW_LINE()
1600 BENCH_BASE(folly_rwspin, 16thread_1pct_write, 16, 0.01, false)
1601 BENCH_REL (shmtx_wr_pri, 16thread_1pct_write, 16, 0.01, false)
1602 BENCH_REL (shmtx_w_bare, 16thread_1pct_write, 16, 0.01, false)
1603 BENCH_REL (shmtx_rd_pri, 16thread_1pct_write, 16, 0.01, false)
1604 BENCH_REL (shmtx_r_bare, 16thread_1pct_write, 16, 0.01, false)
1605 BENCH_REL (folly_ticket, 16thread_1pct_write, 16, 0.01, false)
1606 BENCH_REL (boost_shared, 16thread_1pct_write, 16, 0.01, false)
1607 BENCH_REL (pthrd_rwlock, 16thread_1pct_write, 16, 0.01, false)
1608 BENCHMARK_DRAW_LINE()
1609 BENCH_BASE(folly_rwspin, 32thread_1pct_write, 32, 0.01, false)
1610 BENCH_REL (shmtx_wr_pri, 32thread_1pct_write, 32, 0.01, false)
1611 BENCH_REL (shmtx_w_bare, 32thread_1pct_write, 32, 0.01, false)
1612 BENCH_REL (shmtx_rd_pri, 32thread_1pct_write, 32, 0.01, false)
1613 BENCH_REL (shmtx_r_bare, 32thread_1pct_write, 32, 0.01, false)
1614 BENCH_REL (folly_ticket, 32thread_1pct_write, 32, 0.01, false)
1615 BENCH_REL (boost_shared, 32thread_1pct_write, 32, 0.01, false)
1616 BENCH_REL (pthrd_rwlock, 32thread_1pct_write, 32, 0.01, false)
1617 BENCHMARK_DRAW_LINE()
1618 BENCH_BASE(folly_rwspin, 64thread_1pct_write, 64, 0.01, false)
1619 BENCH_REL (shmtx_wr_pri, 64thread_1pct_write, 64, 0.01, false)
1620 BENCH_REL (shmtx_w_bare, 64thread_1pct_write, 64, 0.01, false)
1621 BENCH_REL (shmtx_rd_pri, 64thread_1pct_write, 64, 0.01, false)
1622 BENCH_REL (shmtx_r_bare, 64thread_1pct_write, 64, 0.01, false)
1623 BENCH_REL (folly_ticket, 64thread_1pct_write, 64, 0.01, false)
1624 BENCH_REL (boost_shared, 64thread_1pct_write, 64, 0.01, false)
1625 BENCH_REL (pthrd_rwlock, 64thread_1pct_write, 64, 0.01, false)
1626
1627 // Worst case scenario for deferred locks. No actual sharing, likely that
1628 // read operations will have to first set the kDeferredReadersPossibleBit,
1629 // and likely that writers will have to scan deferredReaders[].
1630 BENCHMARK_DRAW_LINE()
1631 BENCH_BASE(folly_rwspin, 2thr_2lock_50pct_write, 2, 0.50, true)
1632 BENCH_REL (shmtx_wr_pri, 2thr_2lock_50pct_write, 2, 0.50, true)
1633 BENCH_REL (shmtx_rd_pri, 2thr_2lock_50pct_write, 2, 0.50, true)
1634 BENCH_BASE(folly_rwspin, 4thr_4lock_50pct_write, 4, 0.50, true)
1635 BENCH_REL (shmtx_wr_pri, 4thr_4lock_50pct_write, 4, 0.50, true)
1636 BENCH_REL (shmtx_rd_pri, 4thr_4lock_50pct_write, 4, 0.50, true)
1637 BENCH_BASE(folly_rwspin, 8thr_8lock_50pct_write, 8, 0.50, true)
1638 BENCH_REL (shmtx_wr_pri, 8thr_8lock_50pct_write, 8, 0.50, true)
1639 BENCH_REL (shmtx_rd_pri, 8thr_8lock_50pct_write, 8, 0.50, true)
1640 BENCH_BASE(folly_rwspin, 16thr_16lock_50pct_write, 16, 0.50, true)
1641 BENCH_REL (shmtx_wr_pri, 16thr_16lock_50pct_write, 16, 0.50, true)
1642 BENCH_REL (shmtx_rd_pri, 16thr_16lock_50pct_write, 16, 0.50, true)
1643 BENCH_BASE(folly_rwspin, 32thr_32lock_50pct_write, 32, 0.50, true)
1644 BENCH_REL (shmtx_wr_pri, 32thr_32lock_50pct_write, 32, 0.50, true)
1645 BENCH_REL (shmtx_rd_pri, 32thr_32lock_50pct_write, 32, 0.50, true)
1646 BENCH_BASE(folly_rwspin, 64thr_64lock_50pct_write, 64, 0.50, true)
1647 BENCH_REL (shmtx_wr_pri, 64thr_64lock_50pct_write, 64, 0.50, true)
1648 BENCH_REL (shmtx_rd_pri, 64thr_64lock_50pct_write, 64, 0.50, true)
1649 BENCHMARK_DRAW_LINE()
1650 BENCH_BASE(folly_rwspin, 2thr_2lock_10pct_write, 2, 0.10, true)
1651 BENCH_REL (shmtx_wr_pri, 2thr_2lock_10pct_write, 2, 0.10, true)
1652 BENCH_REL (shmtx_rd_pri, 2thr_2lock_10pct_write, 2, 0.10, true)
1653 BENCH_BASE(folly_rwspin, 4thr_4lock_10pct_write, 4, 0.10, true)
1654 BENCH_REL (shmtx_wr_pri, 4thr_4lock_10pct_write, 4, 0.10, true)
1655 BENCH_REL (shmtx_rd_pri, 4thr_4lock_10pct_write, 4, 0.10, true)
1656 BENCH_BASE(folly_rwspin, 8thr_8lock_10pct_write, 8, 0.10, true)
1657 BENCH_REL (shmtx_wr_pri, 8thr_8lock_10pct_write, 8, 0.10, true)
1658 BENCH_REL (shmtx_rd_pri, 8thr_8lock_10pct_write, 8, 0.10, true)
1659 BENCH_BASE(folly_rwspin, 16thr_16lock_10pct_write, 16, 0.10, true)
1660 BENCH_REL (shmtx_wr_pri, 16thr_16lock_10pct_write, 16, 0.10, true)
1661 BENCH_REL (shmtx_rd_pri, 16thr_16lock_10pct_write, 16, 0.10, true)
1662 BENCH_BASE(folly_rwspin, 32thr_32lock_10pct_write, 32, 0.10, true)
1663 BENCH_REL (shmtx_wr_pri, 32thr_32lock_10pct_write, 32, 0.10, true)
1664 BENCH_REL (shmtx_rd_pri, 32thr_32lock_10pct_write, 32, 0.10, true)
1665 BENCH_BASE(folly_rwspin, 64thr_64lock_10pct_write, 64, 0.10, true)
1666 BENCH_REL (shmtx_wr_pri, 64thr_64lock_10pct_write, 64, 0.10, true)
1667 BENCH_REL (shmtx_rd_pri, 64thr_64lock_10pct_write, 64, 0.10, true)
1668 BENCHMARK_DRAW_LINE()
1669 BENCH_BASE(folly_rwspin, 2thr_2lock_1pct_write, 2, 0.01, true)
1670 BENCH_REL (shmtx_wr_pri, 2thr_2lock_1pct_write, 2, 0.01, true)
1671 BENCH_REL (shmtx_rd_pri, 2thr_2lock_1pct_write, 2, 0.01, true)
1672 BENCH_BASE(folly_rwspin, 4thr_4lock_1pct_write, 4, 0.01, true)
1673 BENCH_REL (shmtx_wr_pri, 4thr_4lock_1pct_write, 4, 0.01, true)
1674 BENCH_REL (shmtx_rd_pri, 4thr_4lock_1pct_write, 4, 0.01, true)
1675 BENCH_BASE(folly_rwspin, 8thr_8lock_1pct_write, 8, 0.01, true)
1676 BENCH_REL (shmtx_wr_pri, 8thr_8lock_1pct_write, 8, 0.01, true)
1677 BENCH_REL (shmtx_rd_pri, 8thr_8lock_1pct_write, 8, 0.01, true)
1678 BENCH_BASE(folly_rwspin, 16thr_16lock_1pct_write, 16, 0.01, true)
1679 BENCH_REL (shmtx_wr_pri, 16thr_16lock_1pct_write, 16, 0.01, true)
1680 BENCH_REL (shmtx_rd_pri, 16thr_16lock_1pct_write, 16, 0.01, true)
1681 BENCH_BASE(folly_rwspin, 32thr_32lock_1pct_write, 32, 0.01, true)
1682 BENCH_REL (shmtx_wr_pri, 32thr_32lock_1pct_write, 32, 0.01, true)
1683 BENCH_REL (shmtx_rd_pri, 32thr_32lock_1pct_write, 32, 0.01, true)
1684 BENCH_BASE(folly_rwspin, 64thr_64lock_1pct_write, 64, 0.01, true)
1685 BENCH_REL (shmtx_wr_pri, 64thr_64lock_1pct_write, 64, 0.01, true)
1686 BENCH_REL (shmtx_rd_pri, 64thr_64lock_1pct_write, 64, 0.01, true)
1687
1688 // Ping-pong tests have a scaled number of iterations, because their burn
1689 // loop would make them too slow otherwise.  Ping-pong with burn count of
1690 // 100k or 300k shows the advantage of soft-spin, reducing the cost of
1691 // each wakeup by about 20 usec.  (Take benchmark reported difference,
1692 // ~400 nanos, multiply by the scale of 100, then divide by 2 because
1693 // each round has two wakeups.)
1694 BENCHMARK_DRAW_LINE()
1695 BENCHMARK_DRAW_LINE()
1696 BENCH_BASE(folly_rwspin_ping_pong, burn0, 1, 0)
1697 BENCH_REL (shmtx_w_bare_ping_pong, burn0, 1, 0)
1698 BENCH_REL (shmtx_r_bare_ping_pong, burn0, 1, 0)
1699 BENCH_REL (folly_ticket_ping_pong, burn0, 1, 0)
1700 BENCH_REL (boost_shared_ping_pong, burn0, 1, 0)
1701 BENCH_REL (pthrd_rwlock_ping_pong, burn0, 1, 0)
1702 BENCHMARK_DRAW_LINE()
1703 BENCH_BASE(folly_rwspin_ping_pong, burn100k, 100, 100000)
1704 BENCH_REL (shmtx_w_bare_ping_pong, burn100k, 100, 100000)
1705 BENCH_REL (shmtx_r_bare_ping_pong, burn100k, 100, 100000)
1706 BENCH_REL (folly_ticket_ping_pong, burn100k, 100, 100000)
1707 BENCH_REL (boost_shared_ping_pong, burn100k, 100, 100000)
1708 BENCH_REL (pthrd_rwlock_ping_pong, burn100k, 100, 100000)
1709 BENCHMARK_DRAW_LINE()
1710 BENCH_BASE(folly_rwspin_ping_pong, burn300k, 100, 300000)
1711 BENCH_REL (shmtx_w_bare_ping_pong, burn300k, 100, 300000)
1712 BENCH_REL (shmtx_r_bare_ping_pong, burn300k, 100, 300000)
1713 BENCH_REL (folly_ticket_ping_pong, burn300k, 100, 300000)
1714 BENCH_REL (boost_shared_ping_pong, burn300k, 100, 300000)
1715 BENCH_REL (pthrd_rwlock_ping_pong, burn300k, 100, 300000)
1716 BENCHMARK_DRAW_LINE()
1717 BENCH_BASE(folly_rwspin_ping_pong, burn1M, 1000, 1000000)
1718 BENCH_REL (shmtx_w_bare_ping_pong, burn1M, 1000, 1000000)
1719 BENCH_REL (shmtx_r_bare_ping_pong, burn1M, 1000, 1000000)
1720 BENCH_REL (folly_ticket_ping_pong, burn1M, 1000, 1000000)
1721 BENCH_REL (boost_shared_ping_pong, burn1M, 1000, 1000000)
1722 BENCH_REL (pthrd_rwlock_ping_pong, burn1M, 1000, 1000000)
1723
1724 // Reproduce with 10 minutes and
1725 //   sudo nice -n -20
1726 //     shared_mutex_test --benchmark --bm_min_iters=1000000
1727 //
1728 // Comparison use folly::RWSpinLock as the baseline, with the
1729 // following row being the default SharedMutex (using *Holder or
1730 // Token-ful methods).
1731 //
1732 // Following results on 2-socket Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
1733 //
1734 // ============================================================================
1735 // folly/test/SharedMutexTest.cpp                  relative  time/iter  iters/s
1736 // ============================================================================
1737 // single_thread_lock_shared_unlock_shared                     25.17ns   39.74M
1738 // single_thread_lock_unlock                                   25.88ns   38.64M
1739 // ----------------------------------------------------------------------------
1740 // ----------------------------------------------------------------------------
1741 // folly_rwspin_reads(1thread)                                 15.16ns   65.95M
1742 // shmtx_wr_pri_reads(1thread)                       69.18%    21.92ns   45.63M
1743 // shmtx_w_bare_reads(1thread)                       56.07%    27.04ns   36.98M
1744 // shmtx_rd_pri_reads(1thread)                       69.06%    21.95ns   45.55M
1745 // shmtx_r_bare_reads(1thread)                       56.36%    26.90ns   37.17M
1746 // folly_ticket_reads(1thread)                       57.56%    26.34ns   37.96M
1747 // boost_shared_reads(1thread)                       10.55%   143.72ns    6.96M
1748 // pthrd_rwlock_reads(1thread)                       39.61%    38.28ns   26.12M
1749 // ----------------------------------------------------------------------------
1750 // folly_rwspin_reads(2thread)                                 45.05ns   22.20M
1751 // shmtx_wr_pri_reads(2thread)                      379.98%    11.86ns   84.34M
1752 // shmtx_w_bare_reads(2thread)                      319.27%    14.11ns   70.87M
1753 // shmtx_rd_pri_reads(2thread)                      385.59%    11.68ns   85.59M
1754 // shmtx_r_bare_reads(2thread)                      306.56%    14.70ns   68.04M
1755 // folly_ticket_reads(2thread)                       61.07%    73.78ns   13.55M
1756 // boost_shared_reads(2thread)                       13.54%   332.66ns    3.01M
1757 // pthrd_rwlock_reads(2thread)                       34.22%   131.65ns    7.60M
1758 // ----------------------------------------------------------------------------
1759 // folly_rwspin_reads(4thread)                                 62.19ns   16.08M
1760 // shmtx_wr_pri_reads(4thread)                     1022.82%     6.08ns  164.48M
1761 // shmtx_w_bare_reads(4thread)                      875.37%     7.10ns  140.76M
1762 // shmtx_rd_pri_reads(4thread)                     1060.46%     5.86ns  170.53M
1763 // shmtx_r_bare_reads(4thread)                      879.88%     7.07ns  141.49M
1764 // folly_ticket_reads(4thread)                       64.62%    96.23ns   10.39M
1765 // boost_shared_reads(4thread)                       14.86%   418.49ns    2.39M
1766 // pthrd_rwlock_reads(4thread)                       25.01%   248.65ns    4.02M
1767 // ----------------------------------------------------------------------------
1768 // folly_rwspin_reads(8thread)                                 64.09ns   15.60M
1769 // shmtx_wr_pri_reads(8thread)                     2191.99%     2.92ns  342.03M
1770 // shmtx_w_bare_reads(8thread)                     1804.92%     3.55ns  281.63M
1771 // shmtx_rd_pri_reads(8thread)                     2194.60%     2.92ns  342.44M
1772 // shmtx_r_bare_reads(8thread)                     1800.53%     3.56ns  280.95M
1773 // folly_ticket_reads(8thread)                       54.90%   116.74ns    8.57M
1774 // boost_shared_reads(8thread)                       18.25%   351.24ns    2.85M
1775 // pthrd_rwlock_reads(8thread)                       28.19%   227.31ns    4.40M
1776 // ----------------------------------------------------------------------------
1777 // folly_rwspin_reads(16thread)                                70.06ns   14.27M
1778 // shmtx_wr_pri_reads(16thread)                    4970.09%     1.41ns  709.38M
1779 // shmtx_w_bare_reads(16thread)                    4143.75%     1.69ns  591.44M
1780 // shmtx_rd_pri_reads(16thread)                    5009.31%     1.40ns  714.98M
1781 // shmtx_r_bare_reads(16thread)                    4067.36%     1.72ns  580.54M
1782 // folly_ticket_reads(16thread)                      46.78%   149.77ns    6.68M
1783 // boost_shared_reads(16thread)                      21.67%   323.37ns    3.09M
1784 // pthrd_rwlock_reads(16thread)                      35.05%   199.90ns    5.00M
1785 // ----------------------------------------------------------------------------
1786 // folly_rwspin_reads(32thread)                                58.83ns   17.00M
1787 // shmtx_wr_pri_reads(32thread)                    5158.37%     1.14ns  876.79M
1788 // shmtx_w_bare_reads(32thread)                    4246.03%     1.39ns  721.72M
1789 // shmtx_rd_pri_reads(32thread)                    4845.97%     1.21ns  823.69M
1790 // shmtx_r_bare_reads(32thread)                    4721.44%     1.25ns  802.52M
1791 // folly_ticket_reads(32thread)                      28.40%   207.15ns    4.83M
1792 // boost_shared_reads(32thread)                      17.08%   344.54ns    2.90M
1793 // pthrd_rwlock_reads(32thread)                      30.01%   196.02ns    5.10M
1794 // ----------------------------------------------------------------------------
1795 // folly_rwspin_reads(64thread)                                59.19ns   16.89M
1796 // shmtx_wr_pri_reads(64thread)                    3804.54%     1.56ns  642.76M
1797 // shmtx_w_bare_reads(64thread)                    3625.06%     1.63ns  612.43M
1798 // shmtx_rd_pri_reads(64thread)                    3418.19%     1.73ns  577.48M
1799 // shmtx_r_bare_reads(64thread)                    3416.98%     1.73ns  577.28M
1800 // folly_ticket_reads(64thread)                      30.53%   193.90ns    5.16M
1801 // boost_shared_reads(64thread)                      18.59%   318.47ns    3.14M
1802 // pthrd_rwlock_reads(64thread)                      31.35%   188.81ns    5.30M
1803 // ----------------------------------------------------------------------------
1804 // ----------------------------------------------------------------------------
1805 // folly_rwspin(1thread_all_write)                             23.77ns   42.06M
1806 // shmtx_wr_pri(1thread_all_write)                   85.09%    27.94ns   35.79M
1807 // shmtx_rd_pri(1thread_all_write)                   85.32%    27.87ns   35.89M
1808 // folly_ticket(1thread_all_write)                   88.11%    26.98ns   37.06M
1809 // boost_shared(1thread_all_write)                   16.49%   144.14ns    6.94M
1810 // pthrd_rwlock(1thread_all_write)                   53.99%    44.04ns   22.71M
1811 // pthrd_mutex_(1thread_all_write)                   86.05%    27.63ns   36.20M
1812 // ----------------------------------------------------------------------------
1813 // folly_rwspin(2thread_all_write)                             76.05ns   13.15M
1814 // shmtx_wr_pri(2thread_all_write)                   60.67%   125.35ns    7.98M
1815 // shmtx_rd_pri(2thread_all_write)                   60.36%   125.99ns    7.94M
1816 // folly_ticket(2thread_all_write)                  129.10%    58.91ns   16.98M
1817 // boost_shared(2thread_all_write)                   18.65%   407.74ns    2.45M
1818 // pthrd_rwlock(2thread_all_write)                   40.90%   185.92ns    5.38M
1819 // pthrd_mutex_(2thread_all_write)                  127.37%    59.71ns   16.75M
1820 // ----------------------------------------------------------------------------
1821 // folly_rwspin(4thread_all_write)                            207.17ns    4.83M
1822 // shmtx_wr_pri(4thread_all_write)                  119.42%   173.49ns    5.76M
1823 // shmtx_rd_pri(4thread_all_write)                  117.68%   176.05ns    5.68M
1824 // folly_ticket(4thread_all_write)                  182.39%   113.59ns    8.80M
1825 // boost_shared(4thread_all_write)                   11.98%     1.73us  578.46K
1826 // pthrd_rwlock(4thread_all_write)                   27.50%   753.25ns    1.33M
1827 // pthrd_mutex_(4thread_all_write)                  117.75%   175.95ns    5.68M
1828 // ----------------------------------------------------------------------------
1829 // folly_rwspin(8thread_all_write)                            326.50ns    3.06M
1830 // shmtx_wr_pri(8thread_all_write)                  125.47%   260.22ns    3.84M
1831 // shmtx_rd_pri(8thread_all_write)                  124.73%   261.76ns    3.82M
1832 // folly_ticket(8thread_all_write)                  253.39%   128.85ns    7.76M
1833 // boost_shared(8thread_all_write)                    6.36%     5.13us  194.87K
1834 // pthrd_rwlock(8thread_all_write)                   38.54%   847.09ns    1.18M
1835 // pthrd_mutex_(8thread_all_write)                  166.31%   196.32ns    5.09M
1836 // ----------------------------------------------------------------------------
1837 // folly_rwspin(16thread_all_write)                           729.89ns    1.37M
1838 // shmtx_wr_pri(16thread_all_write)                 219.91%   331.91ns    3.01M
1839 // shmtx_rd_pri(16thread_all_write)                 220.09%   331.62ns    3.02M
1840 // folly_ticket(16thread_all_write)                 390.06%   187.12ns    5.34M
1841 // boost_shared(16thread_all_write)                  10.27%     7.11us  140.72K
1842 // pthrd_rwlock(16thread_all_write)                 113.90%   640.84ns    1.56M
1843 // pthrd_mutex_(16thread_all_write)                 401.97%   181.58ns    5.51M
1844 // ----------------------------------------------------------------------------
1845 // folly_rwspin(32thread_all_write)                             1.55us  645.01K
1846 // shmtx_wr_pri(32thread_all_write)                 415.05%   373.54ns    2.68M
1847 // shmtx_rd_pri(32thread_all_write)                 258.45%   599.88ns    1.67M
1848 // folly_ticket(32thread_all_write)                 525.40%   295.09ns    3.39M
1849 // boost_shared(32thread_all_write)                  20.84%     7.44us  134.45K
1850 // pthrd_rwlock(32thread_all_write)                 254.16%   610.00ns    1.64M
1851 // pthrd_mutex_(32thread_all_write)                 852.51%   181.86ns    5.50M
1852 // ----------------------------------------------------------------------------
1853 // folly_rwspin(64thread_all_write)                             2.03us  492.00K
1854 // shmtx_wr_pri(64thread_all_write)                 517.65%   392.64ns    2.55M
1855 // shmtx_rd_pri(64thread_all_write)                 288.20%   705.24ns    1.42M
1856 // folly_ticket(64thread_all_write)                 638.22%   318.47ns    3.14M
1857 // boost_shared(64thread_all_write)                  27.56%     7.37us  135.61K
1858 // pthrd_rwlock(64thread_all_write)                 326.75%   622.04ns    1.61M
1859 // pthrd_mutex_(64thread_all_write)                1231.57%   165.04ns    6.06M
1860 // ----------------------------------------------------------------------------
1861 // ----------------------------------------------------------------------------
1862 // folly_rwspin(1thread_10pct_write)                           19.39ns   51.58M
1863 // shmtx_wr_pri(1thread_10pct_write)                 93.87%    20.65ns   48.42M
1864 // shmtx_rd_pri(1thread_10pct_write)                 93.60%    20.71ns   48.28M
1865 // folly_ticket(1thread_10pct_write)                 73.75%    26.29ns   38.04M
1866 // boost_shared(1thread_10pct_write)                 12.97%   149.53ns    6.69M
1867 // pthrd_rwlock(1thread_10pct_write)                 44.15%    43.92ns   22.77M
1868 // ----------------------------------------------------------------------------
1869 // folly_rwspin(2thread_10pct_write)                          227.88ns    4.39M
1870 // shmtx_wr_pri(2thread_10pct_write)                321.08%    70.98ns   14.09M
1871 // shmtx_rd_pri(2thread_10pct_write)                280.65%    81.20ns   12.32M
1872 // folly_ticket(2thread_10pct_write)                220.43%   103.38ns    9.67M
1873 // boost_shared(2thread_10pct_write)                 58.78%   387.71ns    2.58M
1874 // pthrd_rwlock(2thread_10pct_write)                112.68%   202.23ns    4.94M
1875 // ----------------------------------------------------------------------------
1876 // folly_rwspin(4thread_10pct_write)                          444.94ns    2.25M
1877 // shmtx_wr_pri(4thread_10pct_write)                470.35%    94.60ns   10.57M
1878 // shmtx_rd_pri(4thread_10pct_write)                349.08%   127.46ns    7.85M
1879 // folly_ticket(4thread_10pct_write)                305.64%   145.58ns    6.87M
1880 // boost_shared(4thread_10pct_write)                 44.43%     1.00us  998.57K
1881 // pthrd_rwlock(4thread_10pct_write)                100.59%   442.31ns    2.26M
1882 // ----------------------------------------------------------------------------
1883 // folly_rwspin(8thread_10pct_write)                          424.67ns    2.35M
1884 // shmtx_wr_pri(8thread_10pct_write)                337.53%   125.82ns    7.95M
1885 // shmtx_rd_pri(8thread_10pct_write)                232.32%   182.79ns    5.47M
1886 // folly_ticket(8thread_10pct_write)                206.59%   205.56ns    4.86M
1887 // boost_shared(8thread_10pct_write)                 19.45%     2.18us  457.90K
1888 // pthrd_rwlock(8thread_10pct_write)                 78.58%   540.42ns    1.85M
1889 // ----------------------------------------------------------------------------
1890 // folly_rwspin(16thread_10pct_write)                         727.04ns    1.38M
1891 // shmtx_wr_pri(16thread_10pct_write)               400.60%   181.49ns    5.51M
1892 // shmtx_rd_pri(16thread_10pct_write)               312.94%   232.33ns    4.30M
1893 // folly_ticket(16thread_10pct_write)               283.67%   256.30ns    3.90M
1894 // boost_shared(16thread_10pct_write)                15.87%     4.58us  218.32K
1895 // pthrd_rwlock(16thread_10pct_write)               131.28%   553.82ns    1.81M
1896 // ----------------------------------------------------------------------------
1897 // folly_rwspin(32thread_10pct_write)                         810.61ns    1.23M
1898 // shmtx_wr_pri(32thread_10pct_write)               429.61%   188.68ns    5.30M
1899 // shmtx_rd_pri(32thread_10pct_write)               321.13%   252.42ns    3.96M
1900 // folly_ticket(32thread_10pct_write)               247.65%   327.32ns    3.06M
1901 // boost_shared(32thread_10pct_write)                 8.34%     9.71us  102.94K
1902 // pthrd_rwlock(32thread_10pct_write)               144.28%   561.85ns    1.78M
1903 // ----------------------------------------------------------------------------
1904 // folly_rwspin(64thread_10pct_write)                           1.10us  912.30K
1905 // shmtx_wr_pri(64thread_10pct_write)               486.68%   225.22ns    4.44M
1906 // shmtx_rd_pri(64thread_10pct_write)               412.96%   265.43ns    3.77M
1907 // folly_ticket(64thread_10pct_write)               280.23%   391.15ns    2.56M
1908 // boost_shared(64thread_10pct_write)                 6.16%    17.79us   56.22K
1909 // pthrd_rwlock(64thread_10pct_write)               198.81%   551.34ns    1.81M
1910 // ----------------------------------------------------------------------------
1911 // ----------------------------------------------------------------------------
1912 // folly_rwspin(1thread_1pct_write)                            19.02ns   52.57M
1913 // shmtx_wr_pri(1thread_1pct_write)                  94.46%    20.14ns   49.66M
1914 // shmtx_w_bare(1thread_1pct_write)                  76.60%    24.83ns   40.27M
1915 // shmtx_rd_pri(1thread_1pct_write)                  93.83%    20.27ns   49.33M
1916 // shmtx_r_bare(1thread_1pct_write)                  77.04%    24.69ns   40.50M
1917 // folly_ticket(1thread_1pct_write)                  72.83%    26.12ns   38.29M
1918 // boost_shared(1thread_1pct_write)                  12.48%   152.44ns    6.56M
1919 // pthrd_rwlock(1thread_1pct_write)                  42.85%    44.39ns   22.53M
1920 // ----------------------------------------------------------------------------
1921 // folly_rwspin(2thread_1pct_write)                           110.63ns    9.04M
1922 // shmtx_wr_pri(2thread_1pct_write)                 442.12%    25.02ns   39.96M
1923 // shmtx_w_bare(2thread_1pct_write)                 374.65%    29.53ns   33.86M
1924 // shmtx_rd_pri(2thread_1pct_write)                 371.08%    29.81ns   33.54M
1925 // shmtx_r_bare(2thread_1pct_write)                 138.02%    80.15ns   12.48M
1926 // folly_ticket(2thread_1pct_write)                 131.34%    84.23ns   11.87M
1927 // boost_shared(2thread_1pct_write)                  30.35%   364.58ns    2.74M
1928 // pthrd_rwlock(2thread_1pct_write)                  95.48%   115.87ns    8.63M
1929 // ----------------------------------------------------------------------------
1930 // folly_rwspin(4thread_1pct_write)                           140.62ns    7.11M
1931 // shmtx_wr_pri(4thread_1pct_write)                 627.13%    22.42ns   44.60M
1932 // shmtx_w_bare(4thread_1pct_write)                 552.94%    25.43ns   39.32M
1933 // shmtx_rd_pri(4thread_1pct_write)                 226.06%    62.21ns   16.08M
1934 // shmtx_r_bare(4thread_1pct_write)                  77.61%   181.19ns    5.52M
1935 // folly_ticket(4thread_1pct_write)                 119.58%   117.60ns    8.50M
1936 // boost_shared(4thread_1pct_write)                  25.36%   554.54ns    1.80M
1937 // pthrd_rwlock(4thread_1pct_write)                  45.55%   308.72ns    3.24M
1938 // ----------------------------------------------------------------------------
1939 // folly_rwspin(8thread_1pct_write)                           166.23ns    6.02M
1940 // shmtx_wr_pri(8thread_1pct_write)                 687.09%    24.19ns   41.33M
1941 // shmtx_w_bare(8thread_1pct_write)                 611.80%    27.17ns   36.80M
1942 // shmtx_rd_pri(8thread_1pct_write)                 140.37%   118.43ns    8.44M
1943 // shmtx_r_bare(8thread_1pct_write)                  80.32%   206.97ns    4.83M
1944 // folly_ticket(8thread_1pct_write)                 117.06%   142.01ns    7.04M
1945 // boost_shared(8thread_1pct_write)                  22.29%   745.67ns    1.34M
1946 // pthrd_rwlock(8thread_1pct_write)                  49.84%   333.55ns    3.00M
1947 // ----------------------------------------------------------------------------
1948 // folly_rwspin(16thread_1pct_write)                          419.79ns    2.38M
1949 // shmtx_wr_pri(16thread_1pct_write)               1397.92%    30.03ns   33.30M
1950 // shmtx_w_bare(16thread_1pct_write)               1324.60%    31.69ns   31.55M
1951 // shmtx_rd_pri(16thread_1pct_write)                278.12%   150.94ns    6.63M
1952 // shmtx_r_bare(16thread_1pct_write)                194.25%   216.11ns    4.63M
1953 // folly_ticket(16thread_1pct_write)                255.38%   164.38ns    6.08M
1954 // boost_shared(16thread_1pct_write)                 33.71%     1.25us  803.01K
1955 // pthrd_rwlock(16thread_1pct_write)                131.96%   318.12ns    3.14M
1956 // ----------------------------------------------------------------------------
1957 // folly_rwspin(32thread_1pct_write)                          395.99ns    2.53M
1958 // shmtx_wr_pri(32thread_1pct_write)               1332.76%    29.71ns   33.66M
1959 // shmtx_w_bare(32thread_1pct_write)               1208.86%    32.76ns   30.53M
1960 // shmtx_rd_pri(32thread_1pct_write)                252.97%   156.54ns    6.39M
1961 // shmtx_r_bare(32thread_1pct_write)                193.79%   204.35ns    4.89M
1962 // folly_ticket(32thread_1pct_write)                173.16%   228.69ns    4.37M
1963 // boost_shared(32thread_1pct_write)                 17.00%     2.33us  429.40K
1964 // pthrd_rwlock(32thread_1pct_write)                129.88%   304.89ns    3.28M
1965 // ----------------------------------------------------------------------------
1966 // folly_rwspin(64thread_1pct_write)                          424.07ns    2.36M
1967 // shmtx_wr_pri(64thread_1pct_write)               1297.89%    32.67ns   30.61M
1968 // shmtx_w_bare(64thread_1pct_write)               1228.88%    34.51ns   28.98M
1969 // shmtx_rd_pri(64thread_1pct_write)                270.40%   156.83ns    6.38M
1970 // shmtx_r_bare(64thread_1pct_write)                218.05%   194.48ns    5.14M
1971 // folly_ticket(64thread_1pct_write)                171.44%   247.36ns    4.04M
1972 // boost_shared(64thread_1pct_write)                 10.60%     4.00us  249.95K
1973 // pthrd_rwlock(64thread_1pct_write)                143.80%   294.91ns    3.39M
1974 // ----------------------------------------------------------------------------
1975 // folly_rwspin(2thr_2lock_50pct_write)                        10.87ns   91.99M
1976 // shmtx_wr_pri(2thr_2lock_50pct_write)              83.71%    12.99ns   77.01M
1977 // shmtx_rd_pri(2thr_2lock_50pct_write)              84.08%    12.93ns   77.34M
1978 // folly_rwspin(4thr_4lock_50pct_write)                         5.32ns  188.12M
1979 // shmtx_wr_pri(4thr_4lock_50pct_write)              82.21%     6.47ns  154.65M
1980 // shmtx_rd_pri(4thr_4lock_50pct_write)              81.20%     6.55ns  152.75M
1981 // folly_rwspin(8thr_8lock_50pct_write)                         2.64ns  379.06M
1982 // shmtx_wr_pri(8thr_8lock_50pct_write)              81.26%     3.25ns  308.03M
1983 // shmtx_rd_pri(8thr_8lock_50pct_write)              80.95%     3.26ns  306.86M
1984 // folly_rwspin(16thr_16lock_50pct_write)                       1.52ns  656.77M
1985 // shmtx_wr_pri(16thr_16lock_50pct_write)            86.24%     1.77ns  566.41M
1986 // shmtx_rd_pri(16thr_16lock_50pct_write)            83.72%     1.82ns  549.82M
1987 // folly_rwspin(32thr_32lock_50pct_write)                       1.19ns  841.03M
1988 // shmtx_wr_pri(32thr_32lock_50pct_write)            85.08%     1.40ns  715.55M
1989 // shmtx_rd_pri(32thr_32lock_50pct_write)            86.44%     1.38ns  727.00M
1990 // folly_rwspin(64thr_64lock_50pct_write)                       1.46ns  684.28M
1991 // shmtx_wr_pri(64thr_64lock_50pct_write)            84.53%     1.73ns  578.43M
1992 // shmtx_rd_pri(64thr_64lock_50pct_write)            82.80%     1.76ns  566.58M
1993 // ----------------------------------------------------------------------------
1994 // folly_rwspin(2thr_2lock_10pct_write)                        10.01ns   99.85M
1995 // shmtx_wr_pri(2thr_2lock_10pct_write)              92.02%    10.88ns   91.88M
1996 // shmtx_rd_pri(2thr_2lock_10pct_write)              92.35%    10.84ns   92.22M
1997 // folly_rwspin(4thr_4lock_10pct_write)                         4.81ns  207.87M
1998 // shmtx_wr_pri(4thr_4lock_10pct_write)              89.32%     5.39ns  185.67M
1999 // shmtx_rd_pri(4thr_4lock_10pct_write)              88.96%     5.41ns  184.93M
2000 // folly_rwspin(8thr_8lock_10pct_write)                         2.39ns  417.62M
2001 // shmtx_wr_pri(8thr_8lock_10pct_write)              91.17%     2.63ns  380.76M
2002 // shmtx_rd_pri(8thr_8lock_10pct_write)              89.53%     2.67ns  373.92M
2003 // folly_rwspin(16thr_16lock_10pct_write)                       1.16ns  860.47M
2004 // shmtx_wr_pri(16thr_16lock_10pct_write)            74.35%     1.56ns  639.77M
2005 // shmtx_rd_pri(16thr_16lock_10pct_write)            91.34%     1.27ns  785.97M
2006 // folly_rwspin(32thr_32lock_10pct_write)                       1.15ns  866.23M
2007 // shmtx_wr_pri(32thr_32lock_10pct_write)            92.32%     1.25ns  799.72M
2008 // shmtx_rd_pri(32thr_32lock_10pct_write)            94.40%     1.22ns  817.71M
2009 // folly_rwspin(64thr_64lock_10pct_write)                       1.41ns  710.54M
2010 // shmtx_wr_pri(64thr_64lock_10pct_write)            94.14%     1.50ns  668.88M
2011 // shmtx_rd_pri(64thr_64lock_10pct_write)            94.80%     1.48ns  673.56M
2012 // ----------------------------------------------------------------------------
2013 // folly_rwspin(2thr_2lock_1pct_write)                          9.58ns  104.36M
2014 // shmtx_wr_pri(2thr_2lock_1pct_write)               92.00%    10.42ns   96.01M
2015 // shmtx_rd_pri(2thr_2lock_1pct_write)               91.79%    10.44ns   95.79M
2016 // folly_rwspin(4thr_4lock_1pct_write)                          4.71ns  212.30M
2017 // shmtx_wr_pri(4thr_4lock_1pct_write)               90.37%     5.21ns  191.85M
2018 // shmtx_rd_pri(4thr_4lock_1pct_write)               89.94%     5.24ns  190.95M
2019 // folly_rwspin(8thr_8lock_1pct_write)                          2.33ns  429.91M
2020 // shmtx_wr_pri(8thr_8lock_1pct_write)               90.67%     2.57ns  389.80M
2021 // shmtx_rd_pri(8thr_8lock_1pct_write)               90.61%     2.57ns  389.55M
2022 // folly_rwspin(16thr_16lock_1pct_write)                        1.10ns  905.23M
2023 // shmtx_wr_pri(16thr_16lock_1pct_write)             91.96%     1.20ns  832.46M
2024 // shmtx_rd_pri(16thr_16lock_1pct_write)             92.29%     1.20ns  835.42M
2025 // folly_rwspin(32thr_32lock_1pct_write)                        1.14ns  879.85M
2026 // shmtx_wr_pri(32thr_32lock_1pct_write)             93.41%     1.22ns  821.86M
2027 // shmtx_rd_pri(32thr_32lock_1pct_write)             94.18%     1.21ns  828.66M
2028 // folly_rwspin(64thr_64lock_1pct_write)                        1.34ns  748.83M
2029 // shmtx_wr_pri(64thr_64lock_1pct_write)             94.39%     1.41ns  706.84M
2030 // shmtx_rd_pri(64thr_64lock_1pct_write)             94.02%     1.42ns  704.06M
2031 // ----------------------------------------------------------------------------
2032 // ----------------------------------------------------------------------------
2033 // folly_rwspin_ping_pong(burn0)                              605.63ns    1.65M
2034 // shmtx_w_bare_ping_pong(burn0)                    102.17%   592.76ns    1.69M
2035 // shmtx_r_bare_ping_pong(burn0)                     88.75%   682.44ns    1.47M
2036 // folly_ticket_ping_pong(burn0)                     63.92%   947.56ns    1.06M
2037 // boost_shared_ping_pong(burn0)                      8.52%     7.11us  140.73K
2038 // pthrd_rwlock_ping_pong(burn0)                      7.88%     7.68us  130.15K
2039 // ----------------------------------------------------------------------------
2040 // folly_rwspin_ping_pong(burn100k)                           727.76ns    1.37M
2041 // shmtx_w_bare_ping_pong(burn100k)                 100.79%   722.09ns    1.38M
2042 // shmtx_r_bare_ping_pong(burn100k)                 101.98%   713.61ns    1.40M
2043 // folly_ticket_ping_pong(burn100k)                 102.80%   707.95ns    1.41M
2044 // boost_shared_ping_pong(burn100k)                  81.49%   893.02ns    1.12M
2045 // pthrd_rwlock_ping_pong(burn100k)                  71.05%     1.02us  976.30K
2046 // ----------------------------------------------------------------------------
2047 // folly_rwspin_ping_pong(burn300k)                             2.11us  473.46K
2048 // shmtx_w_bare_ping_pong(burn300k)                 100.06%     2.11us  473.72K
2049 // shmtx_r_bare_ping_pong(burn300k)                  98.93%     2.13us  468.39K
2050 // folly_ticket_ping_pong(burn300k)                  96.68%     2.18us  457.73K
2051 // boost_shared_ping_pong(burn300k)                  84.72%     2.49us  401.13K
2052 // pthrd_rwlock_ping_pong(burn300k)                  84.62%     2.50us  400.66K
2053 // ----------------------------------------------------------------------------
2054 // folly_rwspin_ping_pong(burn1M)                             709.70ns    1.41M
2055 // shmtx_w_bare_ping_pong(burn1M)                   100.28%   707.73ns    1.41M
2056 // shmtx_r_bare_ping_pong(burn1M)                    99.63%   712.37ns    1.40M
2057 // folly_ticket_ping_pong(burn1M)                   100.09%   709.05ns    1.41M
2058 // boost_shared_ping_pong(burn1M)                    94.09%   754.29ns    1.33M
2059 // pthrd_rwlock_ping_pong(burn1M)                    96.32%   736.82ns    1.36M
2060 // ============================================================================
2061
2062 int main(int argc, char** argv) {
2063   (void)folly_rwspin_reads;
2064   (void)shmtx_wr_pri_reads;
2065   (void)shmtx_w_bare_reads;
2066   (void)shmtx_rd_pri_reads;
2067   (void)shmtx_r_bare_reads;
2068   (void)folly_ticket_reads;
2069   (void)boost_shared_reads;
2070   (void)pthrd_rwlock_reads;
2071   (void)folly_rwspin;
2072   (void)shmtx_wr_pri;
2073   (void)shmtx_w_bare;
2074   (void)shmtx_rd_pri;
2075   (void)shmtx_r_bare;
2076   (void)folly_ticket;
2077   (void)boost_shared;
2078   (void)pthrd_rwlock;
2079   (void)pthrd_mutex_;
2080   (void)folly_rwspin_ping_pong;
2081   (void)shmtx_w_bare_ping_pong;
2082   (void)shmtx_r_bare_ping_pong;
2083   (void)folly_ticket_ping_pong;
2084   (void)boost_shared_ping_pong;
2085   (void)pthrd_rwlock_ping_pong;
2086
2087   testing::InitGoogleTest(&argc, argv);
2088   gflags::ParseCommandLineFlags(&argc, &argv, true);
2089   int rv = RUN_ALL_TESTS();
2090   folly::runBenchmarksOnFlag();
2091   return rv;
2092 }