d18e3b6e263592fa87ce91801b898c74ba71877d
[folly.git] / folly / test / SharedMutexTest.cpp
1 /*
2  * Copyright 2017 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <folly/SharedMutex.h>
18
19 #include <stdlib.h>
20 #include <thread>
21 #include <vector>
22
23 #include <boost/optional.hpp>
24 #include <boost/thread/shared_mutex.hpp>
25
26 #include <folly/Benchmark.h>
27 #include <folly/MPMCQueue.h>
28 #include <folly/RWSpinLock.h>
29 #include <folly/portability/GFlags.h>
30 #include <folly/portability/GTest.h>
31 #include <folly/test/DeterministicSchedule.h>
32
33 using namespace folly;
34 using namespace folly::test;
35 using namespace std;
36 using namespace chrono;
37
38 typedef DeterministicSchedule DSched;
39 typedef SharedMutexImpl<true, void, DeterministicAtomic, true>
40     DSharedMutexReadPriority;
41 typedef SharedMutexImpl<false, void, DeterministicAtomic, true>
42     DSharedMutexWritePriority;
43
44 template <typename Lock>
45 void runBasicTest() {
46   Lock lock;
47   SharedMutexToken token1;
48   SharedMutexToken token2;
49   SharedMutexToken token3;
50
51   EXPECT_TRUE(lock.try_lock());
52   EXPECT_FALSE(lock.try_lock());
53   EXPECT_FALSE(lock.try_lock_shared(token1));
54   lock.unlock();
55
56   EXPECT_TRUE(lock.try_lock_shared(token1));
57   EXPECT_FALSE(lock.try_lock());
58   EXPECT_TRUE(lock.try_lock_shared(token2));
59   lock.lock_shared(token3);
60   lock.unlock_shared(token3);
61   lock.unlock_shared(token2);
62   lock.unlock_shared(token1);
63
64   lock.lock();
65   lock.unlock();
66
67   lock.lock_shared(token1);
68   lock.lock_shared(token2);
69   lock.unlock_shared(token1);
70   lock.unlock_shared(token2);
71
72   lock.lock();
73   lock.unlock_and_lock_shared(token1);
74   lock.lock_shared(token2);
75   lock.unlock_shared(token2);
76   lock.unlock_shared(token1);
77 }
78
79 TEST(SharedMutex, basic) {
80   runBasicTest<SharedMutexReadPriority>();
81   runBasicTest<SharedMutexWritePriority>();
82 }
83
84 template <typename Lock>
85 void runBasicHoldersTest() {
86   Lock lock;
87   SharedMutexToken token;
88
89   {
90     // create an exclusive write lock via holder
91     typename Lock::WriteHolder holder(lock);
92     EXPECT_FALSE(lock.try_lock());
93     EXPECT_FALSE(lock.try_lock_shared(token));
94
95     // move ownership to another write holder via move constructor
96     typename Lock::WriteHolder holder2(std::move(holder));
97     EXPECT_FALSE(lock.try_lock());
98     EXPECT_FALSE(lock.try_lock_shared(token));
99
100     // move ownership to another write holder via assign operator
101     typename Lock::WriteHolder holder3;
102     holder3 = std::move(holder2);
103     EXPECT_FALSE(lock.try_lock());
104     EXPECT_FALSE(lock.try_lock_shared(token));
105
106     // downgrade from exclusive to upgrade lock via move constructor
107     typename Lock::UpgradeHolder holder4(std::move(holder3));
108
109     // ensure we can lock from a shared source
110     EXPECT_FALSE(lock.try_lock());
111     EXPECT_TRUE(lock.try_lock_shared(token));
112     lock.unlock_shared(token);
113
114     // promote from upgrade to exclusive lock via move constructor
115     typename Lock::WriteHolder holder5(std::move(holder4));
116     EXPECT_FALSE(lock.try_lock());
117     EXPECT_FALSE(lock.try_lock_shared(token));
118
119     // downgrade exclusive to shared lock via move constructor
120     typename Lock::ReadHolder holder6(std::move(holder5));
121
122     // ensure we can lock from another shared source
123     EXPECT_FALSE(lock.try_lock());
124     EXPECT_TRUE(lock.try_lock_shared(token));
125     lock.unlock_shared(token);
126   }
127
128   {
129     typename Lock::WriteHolder holder(lock);
130     EXPECT_FALSE(lock.try_lock());
131   }
132
133   {
134     typename Lock::ReadHolder holder(lock);
135     typename Lock::ReadHolder holder2(lock);
136     typename Lock::UpgradeHolder holder3(lock);
137   }
138
139   {
140     typename Lock::UpgradeHolder holder(lock);
141     typename Lock::ReadHolder holder2(lock);
142     typename Lock::ReadHolder holder3(std::move(holder));
143   }
144 }
145
146 TEST(SharedMutex, basic_holders) {
147   runBasicHoldersTest<SharedMutexReadPriority>();
148   runBasicHoldersTest<SharedMutexWritePriority>();
149 }
150
151 template <typename Lock>
152 void runManyReadLocksTestWithTokens() {
153   Lock lock;
154
155   vector<SharedMutexToken> tokens;
156   for (int i = 0; i < 1000; ++i) {
157     tokens.emplace_back();
158     EXPECT_TRUE(lock.try_lock_shared(tokens.back()));
159   }
160   for (auto& token : tokens) {
161     lock.unlock_shared(token);
162   }
163   EXPECT_TRUE(lock.try_lock());
164   lock.unlock();
165 }
166
167 TEST(SharedMutex, many_read_locks_with_tokens) {
168   runManyReadLocksTestWithTokens<SharedMutexReadPriority>();
169   runManyReadLocksTestWithTokens<SharedMutexWritePriority>();
170 }
171
172 template <typename Lock>
173 void runManyReadLocksTestWithoutTokens() {
174   Lock lock;
175
176   for (int i = 0; i < 1000; ++i) {
177     EXPECT_TRUE(lock.try_lock_shared());
178   }
179   for (int i = 0; i < 1000; ++i) {
180     lock.unlock_shared();
181   }
182   EXPECT_TRUE(lock.try_lock());
183   lock.unlock();
184 }
185
186 TEST(SharedMutex, many_read_locks_without_tokens) {
187   runManyReadLocksTestWithoutTokens<SharedMutexReadPriority>();
188   runManyReadLocksTestWithoutTokens<SharedMutexWritePriority>();
189 }
190
191 template <typename Lock>
192 void runTimeoutInPastTest() {
193   Lock lock;
194
195   EXPECT_TRUE(lock.try_lock_for(milliseconds(0)));
196   lock.unlock();
197   EXPECT_TRUE(lock.try_lock_for(milliseconds(-1)));
198   lock.unlock();
199   EXPECT_TRUE(lock.try_lock_shared_for(milliseconds(0)));
200   lock.unlock_shared();
201   EXPECT_TRUE(lock.try_lock_shared_for(milliseconds(-1)));
202   lock.unlock_shared();
203   EXPECT_TRUE(lock.try_lock_until(system_clock::now() - milliseconds(1)));
204   lock.unlock();
205   EXPECT_TRUE(
206       lock.try_lock_shared_until(system_clock::now() - milliseconds(1)));
207   lock.unlock_shared();
208   EXPECT_TRUE(lock.try_lock_until(steady_clock::now() - milliseconds(1)));
209   lock.unlock();
210   EXPECT_TRUE(
211       lock.try_lock_shared_until(steady_clock::now() - milliseconds(1)));
212   lock.unlock_shared();
213 }
214
215 TEST(SharedMutex, timeout_in_past) {
216   runTimeoutInPastTest<SharedMutexReadPriority>();
217   runTimeoutInPastTest<SharedMutexWritePriority>();
218 }
219
220 template <class Func>
221 bool funcHasDuration(milliseconds expectedDuration, Func func) {
222   // elapsed time should eventually fall within expectedDuration +- 25%
223   for (int tries = 0; tries < 100; ++tries) {
224     auto start = steady_clock::now();
225     func();
226     auto elapsed = steady_clock::now() - start;
227     if (elapsed > expectedDuration - expectedDuration / 4 &&
228         elapsed < expectedDuration + expectedDuration / 4) {
229       return true;
230     }
231   }
232   return false;
233 }
234
235 template <typename Lock>
236 void runFailingTryTimeoutTest() {
237   Lock lock;
238   lock.lock();
239   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
240     EXPECT_FALSE(lock.try_lock_for(milliseconds(10)));
241   }));
242   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
243     typename Lock::Token token;
244     EXPECT_FALSE(lock.try_lock_shared_for(milliseconds(10), token));
245   }));
246   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
247     EXPECT_FALSE(lock.try_lock_upgrade_for(milliseconds(10)));
248   }));
249   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
250     EXPECT_FALSE(lock.try_lock_until(steady_clock::now() + milliseconds(10)));
251   }));
252   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
253     typename Lock::Token token;
254     EXPECT_FALSE(lock.try_lock_shared_until(
255         steady_clock::now() + milliseconds(10), token));
256   }));
257   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
258     EXPECT_FALSE(
259         lock.try_lock_upgrade_until(steady_clock::now() + milliseconds(10)));
260   }));
261   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
262     EXPECT_FALSE(lock.try_lock_until(system_clock::now() + milliseconds(10)));
263   }));
264   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
265     typename Lock::Token token;
266     EXPECT_FALSE(lock.try_lock_shared_until(
267         system_clock::now() + milliseconds(10), token));
268   }));
269   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
270     EXPECT_FALSE(
271         lock.try_lock_upgrade_until(system_clock::now() + milliseconds(10)));
272   }));
273   lock.unlock();
274
275   lock.lock_shared();
276   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
277     EXPECT_FALSE(lock.try_lock_for(milliseconds(10)));
278   }));
279   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
280     EXPECT_FALSE(lock.try_lock_until(steady_clock::now() + milliseconds(10)));
281   }));
282   EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
283     EXPECT_FALSE(lock.try_lock_until(system_clock::now() + milliseconds(10)));
284   }));
285   lock.unlock_shared();
286
287   lock.lock();
288   for (int p = 0; p < 8; ++p) {
289     EXPECT_FALSE(lock.try_lock_for(nanoseconds(1 << p)));
290   }
291   lock.unlock();
292
293   for (int p = 0; p < 8; ++p) {
294     typename Lock::ReadHolder holder1(lock);
295     typename Lock::ReadHolder holder2(lock);
296     typename Lock::ReadHolder holder3(lock);
297     EXPECT_FALSE(lock.try_lock_for(nanoseconds(1 << p)));
298   }
299 }
300
301 TEST(SharedMutex, failing_try_timeout) {
302   runFailingTryTimeoutTest<SharedMutexReadPriority>();
303   runFailingTryTimeoutTest<SharedMutexWritePriority>();
304 }
305
306 template <typename Lock>
307 void runBasicUpgradeTest() {
308   Lock lock;
309   typename Lock::Token token1;
310   typename Lock::Token token2;
311
312   lock.lock_upgrade();
313   EXPECT_FALSE(lock.try_lock());
314   EXPECT_TRUE(lock.try_lock_shared(token1));
315   lock.unlock_shared(token1);
316   lock.unlock_upgrade();
317
318   lock.lock_upgrade();
319   lock.unlock_upgrade_and_lock();
320   EXPECT_FALSE(lock.try_lock_shared(token1));
321   lock.unlock();
322
323   lock.lock_upgrade();
324   lock.unlock_upgrade_and_lock_shared(token1);
325   lock.lock_upgrade();
326   lock.unlock_upgrade_and_lock_shared(token2);
327   lock.unlock_shared(token1);
328   lock.unlock_shared(token2);
329
330   lock.lock();
331   lock.unlock_and_lock_upgrade();
332   EXPECT_TRUE(lock.try_lock_shared(token1));
333   lock.unlock_upgrade();
334   lock.unlock_shared(token1);
335 }
336
337 TEST(SharedMutex, basic_upgrade_tests) {
338   runBasicUpgradeTest<SharedMutexReadPriority>();
339   runBasicUpgradeTest<SharedMutexWritePriority>();
340 }
341
342 TEST(SharedMutex, read_has_prio) {
343   SharedMutexReadPriority lock;
344   SharedMutexToken token1;
345   SharedMutexToken token2;
346   lock.lock_shared(token1);
347   bool exclusiveAcquired = false;
348   auto writer = thread([&] {
349     lock.lock();
350     exclusiveAcquired = true;
351     lock.unlock();
352   });
353
354   // lock() can't complete until we unlock token1, but it should stake
355   // its claim with regards to other exclusive or upgrade locks.  We can
356   // use try_lock_upgrade to poll for that eventuality.
357   while (lock.try_lock_upgrade()) {
358     lock.unlock_upgrade();
359     this_thread::yield();
360   }
361   EXPECT_FALSE(exclusiveAcquired);
362
363   // Even though lock() is stuck we should be able to get token2
364   EXPECT_TRUE(lock.try_lock_shared(token2));
365   lock.unlock_shared(token1);
366   lock.unlock_shared(token2);
367   writer.join();
368   EXPECT_TRUE(exclusiveAcquired);
369 }
370
371 TEST(SharedMutex, write_has_prio) {
372   SharedMutexWritePriority lock;
373   SharedMutexToken token1;
374   SharedMutexToken token2;
375   lock.lock_shared(token1);
376   auto writer = thread([&] {
377     lock.lock();
378     lock.unlock();
379   });
380
381   // eventually lock() should block readers
382   while (lock.try_lock_shared(token2)) {
383     lock.unlock_shared(token2);
384     this_thread::yield();
385   }
386
387   lock.unlock_shared(token1);
388   writer.join();
389 }
390
391 struct TokenLocker {
392   SharedMutexToken token;
393
394   template <typename T>
395   void lock(T* lock) {
396     lock->lock();
397   }
398
399   template <typename T>
400   void unlock(T* lock) {
401     lock->unlock();
402   }
403
404   template <typename T>
405   void lock_shared(T* lock) {
406     lock->lock_shared(token);
407   }
408
409   template <typename T>
410   void unlock_shared(T* lock) {
411     lock->unlock_shared(token);
412   }
413 };
414
415 struct Locker {
416   template <typename T>
417   void lock(T* lock) {
418     lock->lock();
419   }
420
421   template <typename T>
422   void unlock(T* lock) {
423     lock->unlock();
424   }
425
426   template <typename T>
427   void lock_shared(T* lock) {
428     lock->lock_shared();
429   }
430
431   template <typename T>
432   void unlock_shared(T* lock) {
433     lock->unlock_shared();
434   }
435 };
436
437 struct EnterLocker {
438   template <typename T>
439   void lock(T* lock) {
440     lock->lock(0);
441   }
442
443   template <typename T>
444   void unlock(T* lock) {
445     lock->unlock();
446   }
447
448   template <typename T>
449   void lock_shared(T* lock) {
450     lock->enter(0);
451   }
452
453   template <typename T>
454   void unlock_shared(T* lock) {
455     lock->leave();
456   }
457 };
458
459 struct PosixRWLock {
460   pthread_rwlock_t lock_;
461
462   PosixRWLock() { pthread_rwlock_init(&lock_, nullptr); }
463
464   ~PosixRWLock() { pthread_rwlock_destroy(&lock_); }
465
466   void lock() { pthread_rwlock_wrlock(&lock_); }
467
468   void unlock() { pthread_rwlock_unlock(&lock_); }
469
470   void lock_shared() { pthread_rwlock_rdlock(&lock_); }
471
472   void unlock_shared() { pthread_rwlock_unlock(&lock_); }
473 };
474
475 struct PosixMutex {
476   pthread_mutex_t lock_;
477
478   PosixMutex() { pthread_mutex_init(&lock_, nullptr); }
479
480   ~PosixMutex() { pthread_mutex_destroy(&lock_); }
481
482   void lock() { pthread_mutex_lock(&lock_); }
483
484   void unlock() { pthread_mutex_unlock(&lock_); }
485
486   void lock_shared() { pthread_mutex_lock(&lock_); }
487
488   void unlock_shared() { pthread_mutex_unlock(&lock_); }
489 };
490
491 template <template <typename> class Atom, typename Lock, typename Locker>
492 static void runContendedReaders(size_t numOps,
493                                 size_t numThreads,
494                                 bool useSeparateLocks) {
495   char padding1[64];
496   (void)padding1;
497   Lock globalLock;
498   int valueProtectedByLock = 10;
499   char padding2[64];
500   (void)padding2;
501   Atom<bool> go(false);
502   Atom<bool>* goPtr = &go; // workaround for clang bug
503   vector<thread> threads(numThreads);
504
505   BENCHMARK_SUSPEND {
506     for (size_t t = 0; t < numThreads; ++t) {
507       threads[t] = DSched::thread([&, t, numThreads] {
508         Lock privateLock;
509         Lock* lock = useSeparateLocks ? &privateLock : &globalLock;
510         Locker locker;
511         while (!goPtr->load()) {
512           this_thread::yield();
513         }
514         for (size_t op = t; op < numOps; op += numThreads) {
515           locker.lock_shared(lock);
516           // note: folly::doNotOptimizeAway reads and writes to its arg,
517           // so the following two lines are very different than a call
518           // to folly::doNotOptimizeAway(valueProtectedByLock);
519           auto copy = valueProtectedByLock;
520           folly::doNotOptimizeAway(copy);
521           locker.unlock_shared(lock);
522         }
523       });
524     }
525   }
526
527   go.store(true);
528   for (auto& thr : threads) {
529     DSched::join(thr);
530   }
531 }
532
533 static void folly_rwspin_reads(uint32_t numOps,
534                                size_t numThreads,
535                                bool useSeparateLocks) {
536   runContendedReaders<atomic, RWSpinLock, Locker>(
537       numOps, numThreads, useSeparateLocks);
538 }
539
540 static void shmtx_wr_pri_reads(uint32_t numOps,
541                                size_t numThreads,
542                                bool useSeparateLocks) {
543   runContendedReaders<atomic, SharedMutexWritePriority, TokenLocker>(
544       numOps, numThreads, useSeparateLocks);
545 }
546
547 static void shmtx_w_bare_reads(uint32_t numOps,
548                                size_t numThreads,
549                                bool useSeparateLocks) {
550   runContendedReaders<atomic, SharedMutexWritePriority, Locker>(
551       numOps, numThreads, useSeparateLocks);
552 }
553
554 static void shmtx_rd_pri_reads(uint32_t numOps,
555                                size_t numThreads,
556                                bool useSeparateLocks) {
557   runContendedReaders<atomic, SharedMutexReadPriority, TokenLocker>(
558       numOps, numThreads, useSeparateLocks);
559 }
560
561 static void shmtx_r_bare_reads(uint32_t numOps,
562                                size_t numThreads,
563                                bool useSeparateLocks) {
564   runContendedReaders<atomic, SharedMutexReadPriority, Locker>(
565       numOps, numThreads, useSeparateLocks);
566 }
567
568 static void folly_ticket_reads(uint32_t numOps,
569                                size_t numThreads,
570                                bool useSeparateLocks) {
571   runContendedReaders<atomic, RWTicketSpinLock64, Locker>(
572       numOps, numThreads, useSeparateLocks);
573 }
574
575 static void boost_shared_reads(uint32_t numOps,
576                                size_t numThreads,
577                                bool useSeparateLocks) {
578   runContendedReaders<atomic, boost::shared_mutex, Locker>(
579       numOps, numThreads, useSeparateLocks);
580 }
581
582 static void pthrd_rwlock_reads(uint32_t numOps,
583                                size_t numThreads,
584                                bool useSeparateLocks) {
585   runContendedReaders<atomic, PosixRWLock, Locker>(
586       numOps, numThreads, useSeparateLocks);
587 }
588
589 template <template <typename> class Atom, typename Lock, typename Locker>
590 static void runMixed(size_t numOps,
591                      size_t numThreads,
592                      double writeFraction,
593                      bool useSeparateLocks) {
594   char padding1[64];
595   (void)padding1;
596   Lock globalLock;
597   int valueProtectedByLock = 0;
598   char padding2[64];
599   (void)padding2;
600   Atom<bool> go(false);
601   Atom<bool>* goPtr = &go; // workaround for clang bug
602   vector<thread> threads(numThreads);
603
604   BENCHMARK_SUSPEND {
605     for (size_t t = 0; t < numThreads; ++t) {
606       threads[t] = DSched::thread([&, t, numThreads] {
607         struct drand48_data buffer;
608         srand48_r(t, &buffer);
609         long writeThreshold = writeFraction * 0x7fffffff;
610         Lock privateLock;
611         Lock* lock = useSeparateLocks ? &privateLock : &globalLock;
612         Locker locker;
613         while (!goPtr->load()) {
614           this_thread::yield();
615         }
616         for (size_t op = t; op < numOps; op += numThreads) {
617           long randVal;
618           lrand48_r(&buffer, &randVal);
619           bool writeOp = randVal < writeThreshold;
620           if (writeOp) {
621             locker.lock(lock);
622             if (!useSeparateLocks) {
623               ++valueProtectedByLock;
624             }
625             locker.unlock(lock);
626           } else {
627             locker.lock_shared(lock);
628             auto v = valueProtectedByLock;
629             folly::doNotOptimizeAway(v);
630             locker.unlock_shared(lock);
631           }
632         }
633       });
634     }
635   }
636
637   go.store(true);
638   for (auto& thr : threads) {
639     DSched::join(thr);
640   }
641 }
642
643 static void folly_rwspin(size_t numOps,
644                          size_t numThreads,
645                          double writeFraction,
646                          bool useSeparateLocks) {
647   runMixed<atomic, RWSpinLock, Locker>(
648       numOps, numThreads, writeFraction, useSeparateLocks);
649 }
650
651 static void shmtx_wr_pri(uint32_t numOps,
652                          size_t numThreads,
653                          double writeFraction,
654                          bool useSeparateLocks) {
655   runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
656       numOps, numThreads, writeFraction, useSeparateLocks);
657 }
658
659 static void shmtx_w_bare(uint32_t numOps,
660                          size_t numThreads,
661                          double writeFraction,
662                          bool useSeparateLocks) {
663   runMixed<atomic, SharedMutexWritePriority, Locker>(
664       numOps, numThreads, writeFraction, useSeparateLocks);
665 }
666
667 static void shmtx_rd_pri(uint32_t numOps,
668                          size_t numThreads,
669                          double writeFraction,
670                          bool useSeparateLocks) {
671   runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
672       numOps, numThreads, writeFraction, useSeparateLocks);
673 }
674
675 static void shmtx_r_bare(uint32_t numOps,
676                          size_t numThreads,
677                          double writeFraction,
678                          bool useSeparateLocks) {
679   runMixed<atomic, SharedMutexReadPriority, Locker>(
680       numOps, numThreads, writeFraction, useSeparateLocks);
681 }
682
683 static void folly_ticket(size_t numOps,
684                          size_t numThreads,
685                          double writeFraction,
686                          bool useSeparateLocks) {
687   runMixed<atomic, RWTicketSpinLock64, Locker>(
688       numOps, numThreads, writeFraction, useSeparateLocks);
689 }
690
691 static void boost_shared(size_t numOps,
692                          size_t numThreads,
693                          double writeFraction,
694                          bool useSeparateLocks) {
695   runMixed<atomic, boost::shared_mutex, Locker>(
696       numOps, numThreads, writeFraction, useSeparateLocks);
697 }
698
699 static void pthrd_rwlock(size_t numOps,
700                          size_t numThreads,
701                          double writeFraction,
702                          bool useSeparateLocks) {
703   runMixed<atomic, PosixRWLock, Locker>(
704       numOps, numThreads, writeFraction, useSeparateLocks);
705 }
706
707 static void pthrd_mutex_(size_t numOps,
708                          size_t numThreads,
709                          double writeFraction,
710                          bool useSeparateLocks) {
711   runMixed<atomic, PosixMutex, Locker>(
712       numOps, numThreads, writeFraction, useSeparateLocks);
713 }
714
715 template <typename Lock, template <typename> class Atom>
716 static void runAllAndValidate(size_t numOps, size_t numThreads) {
717   Lock globalLock;
718   Atom<int> globalExclusiveCount(0);
719   Atom<int> globalUpgradeCount(0);
720   Atom<int> globalSharedCount(0);
721
722   Atom<bool> go(false);
723
724   // clang crashes on access to Atom<> captured by ref in closure
725   Atom<int>* globalExclusiveCountPtr = &globalExclusiveCount;
726   Atom<int>* globalUpgradeCountPtr = &globalUpgradeCount;
727   Atom<int>* globalSharedCountPtr = &globalSharedCount;
728   Atom<bool>* goPtr = &go;
729
730   vector<thread> threads(numThreads);
731
732   BENCHMARK_SUSPEND {
733     for (size_t t = 0; t < numThreads; ++t) {
734       threads[t] = DSched::thread([&, t, numThreads] {
735         struct drand48_data buffer;
736         srand48_r(t, &buffer);
737
738         bool exclusive = false;
739         bool upgrade = false;
740         bool shared = false;
741         bool ourGlobalTokenUsed = false;
742         SharedMutexToken ourGlobalToken;
743
744         Lock privateLock;
745         vector<SharedMutexToken> privateTokens;
746
747         while (!goPtr->load()) {
748           this_thread::yield();
749         }
750         for (size_t op = t; op < numOps; op += numThreads) {
751           // randVal in [0,1000)
752           long randVal;
753           lrand48_r(&buffer, &randVal);
754           randVal = (long)((randVal * (uint64_t)1000) / 0x7fffffff);
755
756           // make as many assertions as possible about the global state
757           if (exclusive) {
758             EXPECT_EQ(1, globalExclusiveCountPtr->load(memory_order_acquire));
759             EXPECT_EQ(0, globalUpgradeCountPtr->load(memory_order_acquire));
760             EXPECT_EQ(0, globalSharedCountPtr->load(memory_order_acquire));
761           }
762           if (upgrade) {
763             EXPECT_EQ(0, globalExclusiveCountPtr->load(memory_order_acquire));
764             EXPECT_EQ(1, globalUpgradeCountPtr->load(memory_order_acquire));
765           }
766           if (shared) {
767             EXPECT_EQ(0, globalExclusiveCountPtr->load(memory_order_acquire));
768             EXPECT_TRUE(globalSharedCountPtr->load(memory_order_acquire) > 0);
769           } else {
770             EXPECT_FALSE(ourGlobalTokenUsed);
771           }
772
773           // independent 20% chance we do something to the private lock
774           if (randVal < 200) {
775             // it's okay to take multiple private shared locks because
776             // we never take an exclusive lock, so reader versus writer
777             // priority doesn't cause deadlocks
778             if (randVal < 100 && privateTokens.size() > 0) {
779               auto i = randVal % privateTokens.size();
780               privateLock.unlock_shared(privateTokens[i]);
781               privateTokens.erase(privateTokens.begin() + i);
782             } else {
783               SharedMutexToken token;
784               privateLock.lock_shared(token);
785               privateTokens.push_back(token);
786             }
787             continue;
788           }
789
790           // if we've got a lock, the only thing we can do is release it
791           // or transform it into a different kind of lock
792           if (exclusive) {
793             exclusive = false;
794             --*globalExclusiveCountPtr;
795             if (randVal < 500) {
796               globalLock.unlock();
797             } else if (randVal < 700) {
798               globalLock.unlock_and_lock_shared();
799               ++*globalSharedCountPtr;
800               shared = true;
801             } else if (randVal < 900) {
802               globalLock.unlock_and_lock_shared(ourGlobalToken);
803               ++*globalSharedCountPtr;
804               shared = true;
805               ourGlobalTokenUsed = true;
806             } else {
807               globalLock.unlock_and_lock_upgrade();
808               ++*globalUpgradeCountPtr;
809               upgrade = true;
810             }
811           } else if (upgrade) {
812             upgrade = false;
813             --*globalUpgradeCountPtr;
814             if (randVal < 500) {
815               globalLock.unlock_upgrade();
816             } else if (randVal < 700) {
817               globalLock.unlock_upgrade_and_lock_shared();
818               ++*globalSharedCountPtr;
819               shared = true;
820             } else if (randVal < 900) {
821               globalLock.unlock_upgrade_and_lock_shared(ourGlobalToken);
822               ++*globalSharedCountPtr;
823               shared = true;
824               ourGlobalTokenUsed = true;
825             } else {
826               globalLock.unlock_upgrade_and_lock();
827               ++*globalExclusiveCountPtr;
828               exclusive = true;
829             }
830           } else if (shared) {
831             shared = false;
832             --*globalSharedCountPtr;
833             if (ourGlobalTokenUsed) {
834               globalLock.unlock_shared(ourGlobalToken);
835               ourGlobalTokenUsed = false;
836             } else {
837               globalLock.unlock_shared();
838             }
839           } else if (randVal < 400) {
840             // 40% chance of shared lock with token, 5 ways to get it
841
842             // delta t goes from -1 millis to 7 millis
843             auto dt = microseconds(10 * (randVal - 100));
844
845             if (randVal < 400) {
846               globalLock.lock_shared(ourGlobalToken);
847               shared = true;
848             } else if (randVal < 500) {
849               shared = globalLock.try_lock_shared(ourGlobalToken);
850             } else if (randVal < 600) {
851               shared = globalLock.try_lock_shared_for(dt, ourGlobalToken);
852             } else if (randVal < 800) {
853               shared = globalLock.try_lock_shared_until(
854                   system_clock::now() + dt, ourGlobalToken);
855             }
856             if (shared) {
857               ourGlobalTokenUsed = true;
858               ++*globalSharedCountPtr;
859             }
860           } else if (randVal < 800) {
861             // 40% chance of shared lock without token
862             auto dt = microseconds(10 * (randVal - 100));
863             if (randVal < 400) {
864               globalLock.lock_shared();
865               shared = true;
866             } else if (randVal < 500) {
867               shared = globalLock.try_lock_shared();
868             } else if (randVal < 600) {
869               shared = globalLock.try_lock_shared_for(dt);
870             } else if (randVal < 800) {
871               shared = globalLock.try_lock_shared_until(
872                   system_clock::now() + dt);
873             }
874             if (shared) {
875               ++*globalSharedCountPtr;
876             }
877           } else if (randVal < 900) {
878             // 10% change of upgrade lock
879             globalLock.lock_upgrade();
880             upgrade = true;
881             ++*globalUpgradeCountPtr;
882           } else {
883             // 10% chance of exclusive lock, 5 ways to get it
884
885             // delta t goes from -1 millis to 9 millis
886             auto dt = microseconds(100 * (randVal - 910));
887
888             if (randVal < 400) {
889               globalLock.lock();
890               exclusive = true;
891             } else if (randVal < 500) {
892               exclusive = globalLock.try_lock();
893             } else if (randVal < 600) {
894               exclusive = globalLock.try_lock_for(dt);
895             } else if (randVal < 700) {
896               exclusive = globalLock.try_lock_until(steady_clock::now() + dt);
897             } else {
898               exclusive = globalLock.try_lock_until(system_clock::now() + dt);
899             }
900             if (exclusive) {
901               ++*globalExclusiveCountPtr;
902             }
903           }
904         }
905
906         if (exclusive) {
907           --*globalExclusiveCountPtr;
908           globalLock.unlock();
909         }
910         if (upgrade) {
911           --*globalUpgradeCountPtr;
912           globalLock.unlock_upgrade();
913         }
914         if (shared) {
915           --*globalSharedCountPtr;
916           if (ourGlobalTokenUsed) {
917             globalLock.unlock_shared(ourGlobalToken);
918             ourGlobalTokenUsed = false;
919           } else {
920             globalLock.unlock_shared();
921           }
922         }
923         for (auto& token : privateTokens) {
924           privateLock.unlock_shared(token);
925         }
926       });
927     }
928   }
929
930   go.store(true);
931   for (auto& thr : threads) {
932     DSched::join(thr);
933   }
934 }
935
936 TEST(SharedMutex, deterministic_concurrent_readers_of_one_lock_read_prio) {
937   for (int pass = 0; pass < 3; ++pass) {
938     DSched sched(DSched::uniform(pass));
939     runContendedReaders<DeterministicAtomic,
940                         DSharedMutexReadPriority,
941                         Locker>(1000, 3, false);
942   }
943 }
944
945 TEST(SharedMutex, deterministic_concurrent_readers_of_one_lock_write_prio) {
946   for (int pass = 0; pass < 3; ++pass) {
947     DSched sched(DSched::uniform(pass));
948     runContendedReaders<DeterministicAtomic,
949                         DSharedMutexWritePriority,
950                         Locker>(1000, 3, false);
951   }
952 }
953
954 TEST(SharedMutex, concurrent_readers_of_one_lock_read_prio) {
955   for (int pass = 0; pass < 10; ++pass) {
956     runContendedReaders<atomic, SharedMutexReadPriority, Locker>(
957         100000, 32, false);
958   }
959 }
960
961 TEST(SharedMutex, concurrent_readers_of_one_lock_write_prio) {
962   for (int pass = 0; pass < 10; ++pass) {
963     runContendedReaders<atomic, SharedMutexWritePriority, Locker>(
964         100000, 32, false);
965   }
966 }
967
968 TEST(SharedMutex, deterministic_readers_of_concurrent_locks_read_prio) {
969   for (int pass = 0; pass < 3; ++pass) {
970     DSched sched(DSched::uniform(pass));
971     runContendedReaders<DeterministicAtomic,
972                         DSharedMutexReadPriority,
973                         Locker>(1000, 3, true);
974   }
975 }
976
977 TEST(SharedMutex, deterministic_readers_of_concurrent_locks_write_prio) {
978   for (int pass = 0; pass < 3; ++pass) {
979     DSched sched(DSched::uniform(pass));
980     runContendedReaders<DeterministicAtomic,
981                         DSharedMutexWritePriority,
982                         Locker>(1000, 3, true);
983   }
984 }
985
986 TEST(SharedMutex, readers_of_concurrent_locks_read_prio) {
987   for (int pass = 0; pass < 10; ++pass) {
988     runContendedReaders<atomic, SharedMutexReadPriority, TokenLocker>(
989         100000, 32, true);
990   }
991 }
992
993 TEST(SharedMutex, readers_of_concurrent_locks_write_prio) {
994   for (int pass = 0; pass < 10; ++pass) {
995     runContendedReaders<atomic, SharedMutexWritePriority, TokenLocker>(
996         100000, 32, true);
997   }
998 }
999
1000 TEST(SharedMutex, deterministic_mixed_mostly_read_read_prio) {
1001   for (int pass = 0; pass < 3; ++pass) {
1002     DSched sched(DSched::uniform(pass));
1003     runMixed<DeterministicAtomic, DSharedMutexReadPriority, Locker>(
1004         1000, 3, 0.1, false);
1005   }
1006 }
1007
1008 TEST(SharedMutex, deterministic_mixed_mostly_read_write_prio) {
1009   for (int pass = 0; pass < 3; ++pass) {
1010     DSched sched(DSched::uniform(pass));
1011     runMixed<DeterministicAtomic, DSharedMutexWritePriority, Locker>(
1012         1000, 3, 0.1, false);
1013   }
1014 }
1015
1016 TEST(SharedMutex, mixed_mostly_read_read_prio) {
1017   for (int pass = 0; pass < 5; ++pass) {
1018     runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
1019         10000, 32, 0.1, false);
1020   }
1021 }
1022
1023 TEST(SharedMutex, mixed_mostly_read_write_prio) {
1024   for (int pass = 0; pass < 5; ++pass) {
1025     runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
1026         10000, 32, 0.1, false);
1027   }
1028 }
1029
1030 TEST(SharedMutex, deterministic_mixed_mostly_write_read_prio) {
1031   for (int pass = 0; pass < 1; ++pass) {
1032     DSched sched(DSched::uniform(pass));
1033     runMixed<DeterministicAtomic, DSharedMutexReadPriority, TokenLocker>(
1034         1000, 10, 0.9, false);
1035   }
1036 }
1037
1038 TEST(SharedMutex, deterministic_mixed_mostly_write_write_prio) {
1039   for (int pass = 0; pass < 1; ++pass) {
1040     DSched sched(DSched::uniform(pass));
1041     runMixed<DeterministicAtomic, DSharedMutexWritePriority, TokenLocker>(
1042         1000, 10, 0.9, false);
1043   }
1044 }
1045
1046 TEST(SharedMutex, deterministic_lost_wakeup_write_prio) {
1047   for (int pass = 0; pass < 10; ++pass) {
1048     DSched sched(DSched::uniformSubset(pass, 2, 200));
1049     runMixed<DeterministicAtomic, DSharedMutexWritePriority, TokenLocker>(
1050         1000, 3, 1.0, false);
1051   }
1052 }
1053
1054 TEST(SharedMutex, mixed_mostly_write_read_prio) {
1055   for (int pass = 0; pass < 5; ++pass) {
1056     runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
1057         50000, 300, 0.9, false);
1058   }
1059 }
1060
1061 TEST(SharedMutex, mixed_mostly_write_write_prio) {
1062   for (int pass = 0; pass < 5; ++pass) {
1063     runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
1064         50000, 300, 0.9, false);
1065   }
1066 }
1067
1068 TEST(SharedMutex, deterministic_all_ops_read_prio) {
1069   for (int pass = 0; pass < 5; ++pass) {
1070     DSched sched(DSched::uniform(pass));
1071     runAllAndValidate<DSharedMutexReadPriority, DeterministicAtomic>(1000, 8);
1072   }
1073 }
1074
1075 TEST(SharedMutex, deterministic_all_ops_write_prio) {
1076   for (int pass = 0; pass < 5; ++pass) {
1077     DSched sched(DSched::uniform(pass));
1078     runAllAndValidate<DSharedMutexWritePriority, DeterministicAtomic>(1000, 8);
1079   }
1080 }
1081
1082 TEST(SharedMutex, all_ops_read_prio) {
1083   for (int pass = 0; pass < 5; ++pass) {
1084     runAllAndValidate<SharedMutexReadPriority, atomic>(100000, 32);
1085   }
1086 }
1087
1088 TEST(SharedMutex, all_ops_write_prio) {
1089   for (int pass = 0; pass < 5; ++pass) {
1090     runAllAndValidate<SharedMutexWritePriority, atomic>(100000, 32);
1091   }
1092 }
1093
1094 FOLLY_ASSUME_FBVECTOR_COMPATIBLE(
1095     boost::optional<boost::optional<SharedMutexToken>>)
1096
1097 // Setup is a set of threads that either grab a shared lock, or exclusive
1098 // and then downgrade it, or upgrade then upgrade and downgrade, then
1099 // enqueue the shared lock to a second set of threads that just performs
1100 // unlocks.  Half of the shared locks use tokens, the others don't.
1101 template <typename Lock, template <typename> class Atom>
1102 static void runRemoteUnlock(size_t numOps,
1103                             double preWriteFraction,
1104                             double preUpgradeFraction,
1105                             size_t numSendingThreads,
1106                             size_t numReceivingThreads) {
1107   Lock globalLock;
1108   MPMCQueue<boost::optional<boost::optional<SharedMutexToken>>, Atom>
1109     queue(10);
1110   auto queuePtr = &queue; // workaround for clang crash
1111
1112   Atom<bool> go(false);
1113   auto goPtr = &go; // workaround for clang crash
1114   Atom<int> pendingSenders(numSendingThreads);
1115   auto pendingSendersPtr = &pendingSenders; // workaround for clang crash
1116   vector<thread> threads(numSendingThreads + numReceivingThreads);
1117
1118   BENCHMARK_SUSPEND {
1119     for (size_t t = 0; t < threads.size(); ++t) {
1120       threads[t] = DSched::thread([&, t, numSendingThreads] {
1121         if (t >= numSendingThreads) {
1122           // we're a receiver
1123           typename decltype(queue)::value_type elem;
1124           while (true) {
1125             queuePtr->blockingRead(elem);
1126             if (!elem) {
1127               // EOF, pass the EOF token
1128               queuePtr->blockingWrite(std::move(elem));
1129               break;
1130             }
1131             if (*elem) {
1132               globalLock.unlock_shared(**elem);
1133             } else {
1134               globalLock.unlock_shared();
1135             }
1136           }
1137           return;
1138         }
1139         // else we're a sender
1140
1141         struct drand48_data buffer;
1142         srand48_r(t, &buffer);
1143
1144         while (!goPtr->load()) {
1145           this_thread::yield();
1146         }
1147         for (size_t op = t; op < numOps; op += numSendingThreads) {
1148           long unscaledRandVal;
1149           lrand48_r(&buffer, &unscaledRandVal);
1150
1151           // randVal in [0,1]
1152           double randVal = ((double)unscaledRandVal) / 0x7fffffff;
1153
1154           // extract a bit and rescale
1155           bool useToken = randVal >= 0.5;
1156           randVal = (randVal - (useToken ? 0.5 : 0.0)) * 2;
1157
1158           boost::optional<SharedMutexToken> maybeToken;
1159
1160           if (useToken) {
1161             SharedMutexToken token;
1162             if (randVal < preWriteFraction) {
1163               globalLock.lock();
1164               globalLock.unlock_and_lock_shared(token);
1165             } else if (randVal < preWriteFraction + preUpgradeFraction / 2) {
1166               globalLock.lock_upgrade();
1167               globalLock.unlock_upgrade_and_lock_shared(token);
1168             } else if (randVal < preWriteFraction + preUpgradeFraction) {
1169               globalLock.lock_upgrade();
1170               globalLock.unlock_upgrade_and_lock();
1171               globalLock.unlock_and_lock_shared(token);
1172             } else {
1173               globalLock.lock_shared(token);
1174             }
1175             maybeToken = token;
1176           } else {
1177             if (randVal < preWriteFraction) {
1178               globalLock.lock();
1179               globalLock.unlock_and_lock_shared();
1180             } else if (randVal < preWriteFraction + preUpgradeFraction / 2) {
1181               globalLock.lock_upgrade();
1182               globalLock.unlock_upgrade_and_lock_shared();
1183             } else if (randVal < preWriteFraction + preUpgradeFraction) {
1184               globalLock.lock_upgrade();
1185               globalLock.unlock_upgrade_and_lock();
1186               globalLock.unlock_and_lock_shared();
1187             } else {
1188               globalLock.lock_shared();
1189             }
1190           }
1191
1192           // blockingWrite is emplace-like, so this automatically adds
1193           // another level of wrapping
1194           queuePtr->blockingWrite(maybeToken);
1195         }
1196         if (--*pendingSendersPtr == 0) {
1197           queuePtr->blockingWrite(boost::none);
1198         }
1199       });
1200     }
1201   }
1202
1203   go.store(true);
1204   for (auto& thr : threads) {
1205     DSched::join(thr);
1206   }
1207 }
1208
1209 TEST(SharedMutex, deterministic_remote_write_prio) {
1210   for (int pass = 0; pass < 1; ++pass) {
1211     DSched sched(DSched::uniform(pass));
1212     runRemoteUnlock<DSharedMutexWritePriority, DeterministicAtomic>(
1213         500, 0.1, 0.1, 5, 5);
1214   }
1215 }
1216
1217 TEST(SharedMutex, deterministic_remote_read_prio) {
1218   for (int pass = 0; pass < 1; ++pass) {
1219     DSched sched(DSched::uniform(pass));
1220     runRemoteUnlock<DSharedMutexReadPriority, DeterministicAtomic>(
1221         500, 0.1, 0.1, 5, 5);
1222   }
1223 }
1224
1225 TEST(SharedMutex, remote_write_prio) {
1226   for (int pass = 0; pass < 10; ++pass) {
1227     runRemoteUnlock<SharedMutexWritePriority, atomic>(100000, 0.1, 0.1, 5, 5);
1228   }
1229 }
1230
1231 TEST(SharedMutex, remote_read_prio) {
1232   for (int pass = 0; pass < 100; ++pass) {
1233     runRemoteUnlock<SharedMutexReadPriority, atomic>(100000, 0.1, 0.1, 5, 5);
1234   }
1235 }
1236
1237 static void burn(size_t n) {
1238   for (size_t i = 0; i < n; ++i) {
1239     folly::doNotOptimizeAway(i);
1240   }
1241 }
1242
1243 // Two threads and three locks, arranged so that they have to proceed
1244 // in turn with reader/writer conflict
1245 template <typename Lock, template <typename> class Atom = atomic>
1246 static void runPingPong(size_t numRounds, size_t burnCount) {
1247   char padding1[56];
1248   (void)padding1;
1249   pair<Lock, char[56]> locks[3];
1250   char padding2[56];
1251   (void)padding2;
1252
1253   Atom<int> avail(0);
1254   auto availPtr = &avail; // workaround for clang crash
1255   Atom<bool> go(false);
1256   auto goPtr = &go; // workaround for clang crash
1257   vector<thread> threads(2);
1258
1259   locks[0].first.lock();
1260   locks[1].first.lock();
1261   locks[2].first.lock_shared();
1262
1263   BENCHMARK_SUSPEND {
1264     threads[0] = DSched::thread([&] {
1265       ++*availPtr;
1266       while (!goPtr->load()) {
1267         this_thread::yield();
1268       }
1269       for (size_t i = 0; i < numRounds; ++i) {
1270         locks[i % 3].first.unlock();
1271         locks[(i + 2) % 3].first.lock();
1272         burn(burnCount);
1273       }
1274     });
1275     threads[1] = DSched::thread([&] {
1276       ++*availPtr;
1277       while (!goPtr->load()) {
1278         this_thread::yield();
1279       }
1280       for (size_t i = 0; i < numRounds; ++i) {
1281         locks[i % 3].first.lock_shared();
1282         burn(burnCount);
1283         locks[(i + 2) % 3].first.unlock_shared();
1284       }
1285     });
1286
1287     while (avail.load() < 2) {
1288       this_thread::yield();
1289     }
1290   }
1291
1292   go.store(true);
1293   for (auto& thr : threads) {
1294     DSched::join(thr);
1295   }
1296   locks[numRounds % 3].first.unlock();
1297   locks[(numRounds + 1) % 3].first.unlock();
1298   locks[(numRounds + 2) % 3].first.unlock_shared();
1299 }
1300
1301 static void folly_rwspin_ping_pong(size_t n, size_t scale, size_t burnCount) {
1302   runPingPong<RWSpinLock>(n / scale, burnCount);
1303 }
1304
1305 static void shmtx_w_bare_ping_pong(size_t n, size_t scale, size_t burnCount) {
1306   runPingPong<SharedMutexWritePriority>(n / scale, burnCount);
1307 }
1308
1309 static void shmtx_r_bare_ping_pong(size_t n, size_t scale, size_t burnCount) {
1310   runPingPong<SharedMutexReadPriority>(n / scale, burnCount);
1311 }
1312
1313 static void folly_ticket_ping_pong(size_t n, size_t scale, size_t burnCount) {
1314   runPingPong<RWTicketSpinLock64>(n / scale, burnCount);
1315 }
1316
1317 static void boost_shared_ping_pong(size_t n, size_t scale, size_t burnCount) {
1318   runPingPong<boost::shared_mutex>(n / scale, burnCount);
1319 }
1320
1321 static void pthrd_rwlock_ping_pong(size_t n, size_t scale, size_t burnCount) {
1322   runPingPong<PosixRWLock>(n / scale, burnCount);
1323 }
1324
1325 TEST(SharedMutex, deterministic_ping_pong_write_prio) {
1326   for (int pass = 0; pass < 1; ++pass) {
1327     DSched sched(DSched::uniform(pass));
1328     runPingPong<DSharedMutexWritePriority, DeterministicAtomic>(500, 0);
1329   }
1330 }
1331
1332 TEST(SharedMutex, deterministic_ping_pong_read_prio) {
1333   for (int pass = 0; pass < 1; ++pass) {
1334     DSched sched(DSched::uniform(pass));
1335     runPingPong<DSharedMutexReadPriority, DeterministicAtomic>(500, 0);
1336   }
1337 }
1338
1339 TEST(SharedMutex, ping_pong_write_prio) {
1340   for (int pass = 0; pass < 1; ++pass) {
1341     runPingPong<SharedMutexWritePriority, atomic>(50000, 0);
1342   }
1343 }
1344
1345 TEST(SharedMutex, ping_pong_read_prio) {
1346   for (int pass = 0; pass < 1; ++pass) {
1347     runPingPong<SharedMutexReadPriority, atomic>(50000, 0);
1348   }
1349 }
1350
1351 // This is here so you can tell how much of the runtime reported by the
1352 // more complex harnesses is due to the harness, although due to the
1353 // magic of compiler optimization it may also be slower
1354 BENCHMARK(single_thread_lock_shared_unlock_shared, iters) {
1355   SharedMutex lock;
1356   for (size_t n = 0; n < iters; ++n) {
1357     SharedMutex::Token token;
1358     lock.lock_shared(token);
1359     folly::doNotOptimizeAway(0);
1360     lock.unlock_shared(token);
1361   }
1362 }
1363
1364 BENCHMARK(single_thread_lock_unlock, iters) {
1365   SharedMutex lock;
1366   for (size_t n = 0; n < iters; ++n) {
1367     lock.lock();
1368     folly::doNotOptimizeAway(0);
1369     lock.unlock();
1370   }
1371 }
1372
1373 #define BENCH_BASE(...) FB_VA_GLUE(BENCHMARK_NAMED_PARAM, (__VA_ARGS__))
1374 #define BENCH_REL(...) FB_VA_GLUE(BENCHMARK_RELATIVE_NAMED_PARAM, (__VA_ARGS__))
1375
1376 // 100% reads.  Best-case scenario for deferred locks.  Lock is colocated
1377 // with read data, so inline lock takes cache miss every time but deferred
1378 // lock has only cache hits and local access.
1379 BENCHMARK_DRAW_LINE()
1380 BENCHMARK_DRAW_LINE()
1381 BENCH_BASE(folly_rwspin_reads, 1thread, 1, false)
1382 BENCH_REL (shmtx_wr_pri_reads, 1thread, 1, false)
1383 BENCH_REL (shmtx_w_bare_reads, 1thread, 1, false)
1384 BENCH_REL (shmtx_rd_pri_reads, 1thread, 1, false)
1385 BENCH_REL (shmtx_r_bare_reads, 1thread, 1, false)
1386 BENCH_REL (folly_ticket_reads, 1thread, 1, false)
1387 BENCH_REL (boost_shared_reads, 1thread, 1, false)
1388 BENCH_REL (pthrd_rwlock_reads, 1thread, 1, false)
1389 BENCHMARK_DRAW_LINE()
1390 BENCH_BASE(folly_rwspin_reads, 2thread, 2, false)
1391 BENCH_REL (shmtx_wr_pri_reads, 2thread, 2, false)
1392 BENCH_REL (shmtx_w_bare_reads, 2thread, 2, false)
1393 BENCH_REL (shmtx_rd_pri_reads, 2thread, 2, false)
1394 BENCH_REL (shmtx_r_bare_reads, 2thread, 2, false)
1395 BENCH_REL (folly_ticket_reads, 2thread, 2, false)
1396 BENCH_REL (boost_shared_reads, 2thread, 2, false)
1397 BENCH_REL (pthrd_rwlock_reads, 2thread, 2, false)
1398 BENCHMARK_DRAW_LINE()
1399 BENCH_BASE(folly_rwspin_reads, 4thread, 4, false)
1400 BENCH_REL (shmtx_wr_pri_reads, 4thread, 4, false)
1401 BENCH_REL (shmtx_w_bare_reads, 4thread, 4, false)
1402 BENCH_REL (shmtx_rd_pri_reads, 4thread, 4, false)
1403 BENCH_REL (shmtx_r_bare_reads, 4thread, 4, false)
1404 BENCH_REL (folly_ticket_reads, 4thread, 4, false)
1405 BENCH_REL (boost_shared_reads, 4thread, 4, false)
1406 BENCH_REL (pthrd_rwlock_reads, 4thread, 4, false)
1407 BENCHMARK_DRAW_LINE()
1408 BENCH_BASE(folly_rwspin_reads, 8thread, 8, false)
1409 BENCH_REL (shmtx_wr_pri_reads, 8thread, 8, false)
1410 BENCH_REL (shmtx_w_bare_reads, 8thread, 8, false)
1411 BENCH_REL (shmtx_rd_pri_reads, 8thread, 8, false)
1412 BENCH_REL (shmtx_r_bare_reads, 8thread, 8, false)
1413 BENCH_REL (folly_ticket_reads, 8thread, 8, false)
1414 BENCH_REL (boost_shared_reads, 8thread, 8, false)
1415 BENCH_REL (pthrd_rwlock_reads, 8thread, 8, false)
1416 BENCHMARK_DRAW_LINE()
1417 BENCH_BASE(folly_rwspin_reads, 16thread, 16, false)
1418 BENCH_REL (shmtx_wr_pri_reads, 16thread, 16, false)
1419 BENCH_REL (shmtx_w_bare_reads, 16thread, 16, false)
1420 BENCH_REL (shmtx_rd_pri_reads, 16thread, 16, false)
1421 BENCH_REL (shmtx_r_bare_reads, 16thread, 16, false)
1422 BENCH_REL (folly_ticket_reads, 16thread, 16, false)
1423 BENCH_REL (boost_shared_reads, 16thread, 16, false)
1424 BENCH_REL (pthrd_rwlock_reads, 16thread, 16, false)
1425 BENCHMARK_DRAW_LINE()
1426 BENCH_BASE(folly_rwspin_reads, 32thread, 32, false)
1427 BENCH_REL (shmtx_wr_pri_reads, 32thread, 32, false)
1428 BENCH_REL (shmtx_w_bare_reads, 32thread, 32, false)
1429 BENCH_REL (shmtx_rd_pri_reads, 32thread, 32, false)
1430 BENCH_REL (shmtx_r_bare_reads, 32thread, 32, false)
1431 BENCH_REL (folly_ticket_reads, 32thread, 32, false)
1432 BENCH_REL (boost_shared_reads, 32thread, 32, false)
1433 BENCH_REL (pthrd_rwlock_reads, 32thread, 32, false)
1434 BENCHMARK_DRAW_LINE()
1435 BENCH_BASE(folly_rwspin_reads, 64thread, 64, false)
1436 BENCH_REL (shmtx_wr_pri_reads, 64thread, 64, false)
1437 BENCH_REL (shmtx_w_bare_reads, 64thread, 64, false)
1438 BENCH_REL (shmtx_rd_pri_reads, 64thread, 64, false)
1439 BENCH_REL (shmtx_r_bare_reads, 64thread, 64, false)
1440 BENCH_REL (folly_ticket_reads, 64thread, 64, false)
1441 BENCH_REL (boost_shared_reads, 64thread, 64, false)
1442 BENCH_REL (pthrd_rwlock_reads, 64thread, 64, false)
1443
1444 // 1 lock used by everybody, 100% writes.  Threads only hurt, but it is
1445 // good to not fail catastrophically.  Compare to single_thread_lock_unlock
1446 // to see the overhead of the generic driver (and its pseudo-random number
1447 // generator).  pthrd_mutex_ is a pthread_mutex_t (default, not adaptive),
1448 // which is better than any of the reader-writer locks for this scenario.
1449 BENCHMARK_DRAW_LINE()
1450 BENCHMARK_DRAW_LINE()
1451 BENCH_BASE(folly_rwspin, 1thread_all_write, 1, 1.0, false)
1452 BENCH_REL (shmtx_wr_pri, 1thread_all_write, 1, 1.0, false)
1453 BENCH_REL (shmtx_rd_pri, 1thread_all_write, 1, 1.0, false)
1454 BENCH_REL (folly_ticket, 1thread_all_write, 1, 1.0, false)
1455 BENCH_REL (boost_shared, 1thread_all_write, 1, 1.0, false)
1456 BENCH_REL (pthrd_rwlock, 1thread_all_write, 1, 1.0, false)
1457 BENCH_REL (pthrd_mutex_, 1thread_all_write, 1, 1.0, false)
1458 BENCHMARK_DRAW_LINE()
1459 BENCH_BASE(folly_rwspin, 2thread_all_write, 2, 1.0, false)
1460 BENCH_REL (shmtx_wr_pri, 2thread_all_write, 2, 1.0, false)
1461 BENCH_REL (shmtx_rd_pri, 2thread_all_write, 2, 1.0, false)
1462 BENCH_REL (folly_ticket, 2thread_all_write, 2, 1.0, false)
1463 BENCH_REL (boost_shared, 2thread_all_write, 2, 1.0, false)
1464 BENCH_REL (pthrd_rwlock, 2thread_all_write, 2, 1.0, false)
1465 BENCH_REL (pthrd_mutex_, 2thread_all_write, 2, 1.0, false)
1466 BENCHMARK_DRAW_LINE()
1467 BENCH_BASE(folly_rwspin, 4thread_all_write, 4, 1.0, false)
1468 BENCH_REL (shmtx_wr_pri, 4thread_all_write, 4, 1.0, false)
1469 BENCH_REL (shmtx_rd_pri, 4thread_all_write, 4, 1.0, false)
1470 BENCH_REL (folly_ticket, 4thread_all_write, 4, 1.0, false)
1471 BENCH_REL (boost_shared, 4thread_all_write, 4, 1.0, false)
1472 BENCH_REL (pthrd_rwlock, 4thread_all_write, 4, 1.0, false)
1473 BENCH_REL (pthrd_mutex_, 4thread_all_write, 4, 1.0, false)
1474 BENCHMARK_DRAW_LINE()
1475 BENCH_BASE(folly_rwspin, 8thread_all_write, 8, 1.0, false)
1476 BENCH_REL (shmtx_wr_pri, 8thread_all_write, 8, 1.0, false)
1477 BENCH_REL (shmtx_rd_pri, 8thread_all_write, 8, 1.0, false)
1478 BENCH_REL (folly_ticket, 8thread_all_write, 8, 1.0, false)
1479 BENCH_REL (boost_shared, 8thread_all_write, 8, 1.0, false)
1480 BENCH_REL (pthrd_rwlock, 8thread_all_write, 8, 1.0, false)
1481 BENCH_REL (pthrd_mutex_, 8thread_all_write, 8, 1.0, false)
1482 BENCHMARK_DRAW_LINE()
1483 BENCH_BASE(folly_rwspin, 16thread_all_write, 16, 1.0, false)
1484 BENCH_REL (shmtx_wr_pri, 16thread_all_write, 16, 1.0, false)
1485 BENCH_REL (shmtx_rd_pri, 16thread_all_write, 16, 1.0, false)
1486 BENCH_REL (folly_ticket, 16thread_all_write, 16, 1.0, false)
1487 BENCH_REL (boost_shared, 16thread_all_write, 16, 1.0, false)
1488 BENCH_REL (pthrd_rwlock, 16thread_all_write, 16, 1.0, false)
1489 BENCH_REL (pthrd_mutex_, 16thread_all_write, 16, 1.0, false)
1490 BENCHMARK_DRAW_LINE()
1491 BENCH_BASE(folly_rwspin, 32thread_all_write, 32, 1.0, false)
1492 BENCH_REL (shmtx_wr_pri, 32thread_all_write, 32, 1.0, false)
1493 BENCH_REL (shmtx_rd_pri, 32thread_all_write, 32, 1.0, false)
1494 BENCH_REL (folly_ticket, 32thread_all_write, 32, 1.0, false)
1495 BENCH_REL (boost_shared, 32thread_all_write, 32, 1.0, false)
1496 BENCH_REL (pthrd_rwlock, 32thread_all_write, 32, 1.0, false)
1497 BENCH_REL (pthrd_mutex_, 32thread_all_write, 32, 1.0, false)
1498 BENCHMARK_DRAW_LINE()
1499 BENCH_BASE(folly_rwspin, 64thread_all_write, 64, 1.0, false)
1500 BENCH_REL (shmtx_wr_pri, 64thread_all_write, 64, 1.0, false)
1501 BENCH_REL (shmtx_rd_pri, 64thread_all_write, 64, 1.0, false)
1502 BENCH_REL (folly_ticket, 64thread_all_write, 64, 1.0, false)
1503 BENCH_REL (boost_shared, 64thread_all_write, 64, 1.0, false)
1504 BENCH_REL (pthrd_rwlock, 64thread_all_write, 64, 1.0, false)
1505 BENCH_REL (pthrd_mutex_, 64thread_all_write, 64, 1.0, false)
1506
1507 // 1 lock used by everybody, 10% writes.  Not much scaling to be had.  Perf
1508 // is best at 1 thread, once you've got multiple threads > 8 threads hurts.
1509 BENCHMARK_DRAW_LINE()
1510 BENCHMARK_DRAW_LINE()
1511 BENCH_BASE(folly_rwspin, 1thread_10pct_write, 1, 0.10, false)
1512 BENCH_REL (shmtx_wr_pri, 1thread_10pct_write, 1, 0.10, false)
1513 BENCH_REL (shmtx_rd_pri, 1thread_10pct_write, 1, 0.10, false)
1514 BENCH_REL (folly_ticket, 1thread_10pct_write, 1, 0.10, false)
1515 BENCH_REL (boost_shared, 1thread_10pct_write, 1, 0.10, false)
1516 BENCH_REL (pthrd_rwlock, 1thread_10pct_write, 1, 0.10, false)
1517 BENCHMARK_DRAW_LINE()
1518 BENCH_BASE(folly_rwspin, 2thread_10pct_write, 2, 0.10, false)
1519 BENCH_REL (shmtx_wr_pri, 2thread_10pct_write, 2, 0.10, false)
1520 BENCH_REL (shmtx_rd_pri, 2thread_10pct_write, 2, 0.10, false)
1521 BENCH_REL (folly_ticket, 2thread_10pct_write, 2, 0.10, false)
1522 BENCH_REL (boost_shared, 2thread_10pct_write, 2, 0.10, false)
1523 BENCH_REL (pthrd_rwlock, 2thread_10pct_write, 2, 0.10, false)
1524 BENCHMARK_DRAW_LINE()
1525 BENCH_BASE(folly_rwspin, 4thread_10pct_write, 4, 0.10, false)
1526 BENCH_REL (shmtx_wr_pri, 4thread_10pct_write, 4, 0.10, false)
1527 BENCH_REL (shmtx_rd_pri, 4thread_10pct_write, 4, 0.10, false)
1528 BENCH_REL (folly_ticket, 4thread_10pct_write, 4, 0.10, false)
1529 BENCH_REL (boost_shared, 4thread_10pct_write, 4, 0.10, false)
1530 BENCH_REL (pthrd_rwlock, 4thread_10pct_write, 4, 0.10, false)
1531 BENCHMARK_DRAW_LINE()
1532 BENCH_BASE(folly_rwspin, 8thread_10pct_write, 8, 0.10, false)
1533 BENCH_REL (shmtx_wr_pri, 8thread_10pct_write, 8, 0.10, false)
1534 BENCH_REL (shmtx_rd_pri, 8thread_10pct_write, 8, 0.10, false)
1535 BENCH_REL (folly_ticket, 8thread_10pct_write, 8, 0.10, false)
1536 BENCH_REL (boost_shared, 8thread_10pct_write, 8, 0.10, false)
1537 BENCH_REL (pthrd_rwlock, 8thread_10pct_write, 8, 0.10, false)
1538 BENCHMARK_DRAW_LINE()
1539 BENCH_BASE(folly_rwspin, 16thread_10pct_write, 16, 0.10, false)
1540 BENCH_REL (shmtx_wr_pri, 16thread_10pct_write, 16, 0.10, false)
1541 BENCH_REL (shmtx_rd_pri, 16thread_10pct_write, 16, 0.10, false)
1542 BENCH_REL (folly_ticket, 16thread_10pct_write, 16, 0.10, false)
1543 BENCH_REL (boost_shared, 16thread_10pct_write, 16, 0.10, false)
1544 BENCH_REL (pthrd_rwlock, 16thread_10pct_write, 16, 0.10, false)
1545 BENCHMARK_DRAW_LINE()
1546 BENCH_BASE(folly_rwspin, 32thread_10pct_write, 32, 0.10, false)
1547 BENCH_REL (shmtx_wr_pri, 32thread_10pct_write, 32, 0.10, false)
1548 BENCH_REL (shmtx_rd_pri, 32thread_10pct_write, 32, 0.10, false)
1549 BENCH_REL (folly_ticket, 32thread_10pct_write, 32, 0.10, false)
1550 BENCH_REL (boost_shared, 32thread_10pct_write, 32, 0.10, false)
1551 BENCH_REL (pthrd_rwlock, 32thread_10pct_write, 32, 0.10, false)
1552 BENCHMARK_DRAW_LINE()
1553 BENCH_BASE(folly_rwspin, 64thread_10pct_write, 64, 0.10, false)
1554 BENCH_REL (shmtx_wr_pri, 64thread_10pct_write, 64, 0.10, false)
1555 BENCH_REL (shmtx_rd_pri, 64thread_10pct_write, 64, 0.10, false)
1556 BENCH_REL (folly_ticket, 64thread_10pct_write, 64, 0.10, false)
1557 BENCH_REL (boost_shared, 64thread_10pct_write, 64, 0.10, false)
1558 BENCH_REL (pthrd_rwlock, 64thread_10pct_write, 64, 0.10, false)
1559
1560 // 1 lock used by everybody, 1% writes.  This is a more realistic example
1561 // than the concurrent_*_reads benchmark, but still shows SharedMutex locks
1562 // winning over all of the others
1563 BENCHMARK_DRAW_LINE()
1564 BENCHMARK_DRAW_LINE()
1565 BENCH_BASE(folly_rwspin, 1thread_1pct_write, 1, 0.01, false)
1566 BENCH_REL (shmtx_wr_pri, 1thread_1pct_write, 1, 0.01, false)
1567 BENCH_REL (shmtx_w_bare, 1thread_1pct_write, 1, 0.01, false)
1568 BENCH_REL (shmtx_rd_pri, 1thread_1pct_write, 1, 0.01, false)
1569 BENCH_REL (shmtx_r_bare, 1thread_1pct_write, 1, 0.01, false)
1570 BENCH_REL (folly_ticket, 1thread_1pct_write, 1, 0.01, false)
1571 BENCH_REL (boost_shared, 1thread_1pct_write, 1, 0.01, false)
1572 BENCH_REL (pthrd_rwlock, 1thread_1pct_write, 1, 0.01, false)
1573 BENCHMARK_DRAW_LINE()
1574 BENCH_BASE(folly_rwspin, 2thread_1pct_write, 2, 0.01, false)
1575 BENCH_REL (shmtx_wr_pri, 2thread_1pct_write, 2, 0.01, false)
1576 BENCH_REL (shmtx_w_bare, 2thread_1pct_write, 2, 0.01, false)
1577 BENCH_REL (shmtx_rd_pri, 2thread_1pct_write, 2, 0.01, false)
1578 BENCH_REL (shmtx_r_bare, 2thread_1pct_write, 2, 0.01, false)
1579 BENCH_REL (folly_ticket, 2thread_1pct_write, 2, 0.01, false)
1580 BENCH_REL (boost_shared, 2thread_1pct_write, 2, 0.01, false)
1581 BENCH_REL (pthrd_rwlock, 2thread_1pct_write, 2, 0.01, false)
1582 BENCHMARK_DRAW_LINE()
1583 BENCH_BASE(folly_rwspin, 4thread_1pct_write, 4, 0.01, false)
1584 BENCH_REL (shmtx_wr_pri, 4thread_1pct_write, 4, 0.01, false)
1585 BENCH_REL (shmtx_w_bare, 4thread_1pct_write, 4, 0.01, false)
1586 BENCH_REL (shmtx_rd_pri, 4thread_1pct_write, 4, 0.01, false)
1587 BENCH_REL (shmtx_r_bare, 4thread_1pct_write, 4, 0.01, false)
1588 BENCH_REL (folly_ticket, 4thread_1pct_write, 4, 0.01, false)
1589 BENCH_REL (boost_shared, 4thread_1pct_write, 4, 0.01, false)
1590 BENCH_REL (pthrd_rwlock, 4thread_1pct_write, 4, 0.01, false)
1591 BENCHMARK_DRAW_LINE()
1592 BENCH_BASE(folly_rwspin, 8thread_1pct_write, 8, 0.01, false)
1593 BENCH_REL (shmtx_wr_pri, 8thread_1pct_write, 8, 0.01, false)
1594 BENCH_REL (shmtx_w_bare, 8thread_1pct_write, 8, 0.01, false)
1595 BENCH_REL (shmtx_rd_pri, 8thread_1pct_write, 8, 0.01, false)
1596 BENCH_REL (shmtx_r_bare, 8thread_1pct_write, 8, 0.01, false)
1597 BENCH_REL (folly_ticket, 8thread_1pct_write, 8, 0.01, false)
1598 BENCH_REL (boost_shared, 8thread_1pct_write, 8, 0.01, false)
1599 BENCH_REL (pthrd_rwlock, 8thread_1pct_write, 8, 0.01, false)
1600 BENCHMARK_DRAW_LINE()
1601 BENCH_BASE(folly_rwspin, 16thread_1pct_write, 16, 0.01, false)
1602 BENCH_REL (shmtx_wr_pri, 16thread_1pct_write, 16, 0.01, false)
1603 BENCH_REL (shmtx_w_bare, 16thread_1pct_write, 16, 0.01, false)
1604 BENCH_REL (shmtx_rd_pri, 16thread_1pct_write, 16, 0.01, false)
1605 BENCH_REL (shmtx_r_bare, 16thread_1pct_write, 16, 0.01, false)
1606 BENCH_REL (folly_ticket, 16thread_1pct_write, 16, 0.01, false)
1607 BENCH_REL (boost_shared, 16thread_1pct_write, 16, 0.01, false)
1608 BENCH_REL (pthrd_rwlock, 16thread_1pct_write, 16, 0.01, false)
1609 BENCHMARK_DRAW_LINE()
1610 BENCH_BASE(folly_rwspin, 32thread_1pct_write, 32, 0.01, false)
1611 BENCH_REL (shmtx_wr_pri, 32thread_1pct_write, 32, 0.01, false)
1612 BENCH_REL (shmtx_w_bare, 32thread_1pct_write, 32, 0.01, false)
1613 BENCH_REL (shmtx_rd_pri, 32thread_1pct_write, 32, 0.01, false)
1614 BENCH_REL (shmtx_r_bare, 32thread_1pct_write, 32, 0.01, false)
1615 BENCH_REL (folly_ticket, 32thread_1pct_write, 32, 0.01, false)
1616 BENCH_REL (boost_shared, 32thread_1pct_write, 32, 0.01, false)
1617 BENCH_REL (pthrd_rwlock, 32thread_1pct_write, 32, 0.01, false)
1618 BENCHMARK_DRAW_LINE()
1619 BENCH_BASE(folly_rwspin, 64thread_1pct_write, 64, 0.01, false)
1620 BENCH_REL (shmtx_wr_pri, 64thread_1pct_write, 64, 0.01, false)
1621 BENCH_REL (shmtx_w_bare, 64thread_1pct_write, 64, 0.01, false)
1622 BENCH_REL (shmtx_rd_pri, 64thread_1pct_write, 64, 0.01, false)
1623 BENCH_REL (shmtx_r_bare, 64thread_1pct_write, 64, 0.01, false)
1624 BENCH_REL (folly_ticket, 64thread_1pct_write, 64, 0.01, false)
1625 BENCH_REL (boost_shared, 64thread_1pct_write, 64, 0.01, false)
1626 BENCH_REL (pthrd_rwlock, 64thread_1pct_write, 64, 0.01, false)
1627
1628 // Worst case scenario for deferred locks. No actual sharing, likely that
1629 // read operations will have to first set the kDeferredReadersPossibleBit,
1630 // and likely that writers will have to scan deferredReaders[].
1631 BENCHMARK_DRAW_LINE()
1632 BENCH_BASE(folly_rwspin, 2thr_2lock_50pct_write, 2, 0.50, true)
1633 BENCH_REL (shmtx_wr_pri, 2thr_2lock_50pct_write, 2, 0.50, true)
1634 BENCH_REL (shmtx_rd_pri, 2thr_2lock_50pct_write, 2, 0.50, true)
1635 BENCH_BASE(folly_rwspin, 4thr_4lock_50pct_write, 4, 0.50, true)
1636 BENCH_REL (shmtx_wr_pri, 4thr_4lock_50pct_write, 4, 0.50, true)
1637 BENCH_REL (shmtx_rd_pri, 4thr_4lock_50pct_write, 4, 0.50, true)
1638 BENCH_BASE(folly_rwspin, 8thr_8lock_50pct_write, 8, 0.50, true)
1639 BENCH_REL (shmtx_wr_pri, 8thr_8lock_50pct_write, 8, 0.50, true)
1640 BENCH_REL (shmtx_rd_pri, 8thr_8lock_50pct_write, 8, 0.50, true)
1641 BENCH_BASE(folly_rwspin, 16thr_16lock_50pct_write, 16, 0.50, true)
1642 BENCH_REL (shmtx_wr_pri, 16thr_16lock_50pct_write, 16, 0.50, true)
1643 BENCH_REL (shmtx_rd_pri, 16thr_16lock_50pct_write, 16, 0.50, true)
1644 BENCH_BASE(folly_rwspin, 32thr_32lock_50pct_write, 32, 0.50, true)
1645 BENCH_REL (shmtx_wr_pri, 32thr_32lock_50pct_write, 32, 0.50, true)
1646 BENCH_REL (shmtx_rd_pri, 32thr_32lock_50pct_write, 32, 0.50, true)
1647 BENCH_BASE(folly_rwspin, 64thr_64lock_50pct_write, 64, 0.50, true)
1648 BENCH_REL (shmtx_wr_pri, 64thr_64lock_50pct_write, 64, 0.50, true)
1649 BENCH_REL (shmtx_rd_pri, 64thr_64lock_50pct_write, 64, 0.50, true)
1650 BENCHMARK_DRAW_LINE()
1651 BENCH_BASE(folly_rwspin, 2thr_2lock_10pct_write, 2, 0.10, true)
1652 BENCH_REL (shmtx_wr_pri, 2thr_2lock_10pct_write, 2, 0.10, true)
1653 BENCH_REL (shmtx_rd_pri, 2thr_2lock_10pct_write, 2, 0.10, true)
1654 BENCH_BASE(folly_rwspin, 4thr_4lock_10pct_write, 4, 0.10, true)
1655 BENCH_REL (shmtx_wr_pri, 4thr_4lock_10pct_write, 4, 0.10, true)
1656 BENCH_REL (shmtx_rd_pri, 4thr_4lock_10pct_write, 4, 0.10, true)
1657 BENCH_BASE(folly_rwspin, 8thr_8lock_10pct_write, 8, 0.10, true)
1658 BENCH_REL (shmtx_wr_pri, 8thr_8lock_10pct_write, 8, 0.10, true)
1659 BENCH_REL (shmtx_rd_pri, 8thr_8lock_10pct_write, 8, 0.10, true)
1660 BENCH_BASE(folly_rwspin, 16thr_16lock_10pct_write, 16, 0.10, true)
1661 BENCH_REL (shmtx_wr_pri, 16thr_16lock_10pct_write, 16, 0.10, true)
1662 BENCH_REL (shmtx_rd_pri, 16thr_16lock_10pct_write, 16, 0.10, true)
1663 BENCH_BASE(folly_rwspin, 32thr_32lock_10pct_write, 32, 0.10, true)
1664 BENCH_REL (shmtx_wr_pri, 32thr_32lock_10pct_write, 32, 0.10, true)
1665 BENCH_REL (shmtx_rd_pri, 32thr_32lock_10pct_write, 32, 0.10, true)
1666 BENCH_BASE(folly_rwspin, 64thr_64lock_10pct_write, 64, 0.10, true)
1667 BENCH_REL (shmtx_wr_pri, 64thr_64lock_10pct_write, 64, 0.10, true)
1668 BENCH_REL (shmtx_rd_pri, 64thr_64lock_10pct_write, 64, 0.10, true)
1669 BENCHMARK_DRAW_LINE()
1670 BENCH_BASE(folly_rwspin, 2thr_2lock_1pct_write, 2, 0.01, true)
1671 BENCH_REL (shmtx_wr_pri, 2thr_2lock_1pct_write, 2, 0.01, true)
1672 BENCH_REL (shmtx_rd_pri, 2thr_2lock_1pct_write, 2, 0.01, true)
1673 BENCH_BASE(folly_rwspin, 4thr_4lock_1pct_write, 4, 0.01, true)
1674 BENCH_REL (shmtx_wr_pri, 4thr_4lock_1pct_write, 4, 0.01, true)
1675 BENCH_REL (shmtx_rd_pri, 4thr_4lock_1pct_write, 4, 0.01, true)
1676 BENCH_BASE(folly_rwspin, 8thr_8lock_1pct_write, 8, 0.01, true)
1677 BENCH_REL (shmtx_wr_pri, 8thr_8lock_1pct_write, 8, 0.01, true)
1678 BENCH_REL (shmtx_rd_pri, 8thr_8lock_1pct_write, 8, 0.01, true)
1679 BENCH_BASE(folly_rwspin, 16thr_16lock_1pct_write, 16, 0.01, true)
1680 BENCH_REL (shmtx_wr_pri, 16thr_16lock_1pct_write, 16, 0.01, true)
1681 BENCH_REL (shmtx_rd_pri, 16thr_16lock_1pct_write, 16, 0.01, true)
1682 BENCH_BASE(folly_rwspin, 32thr_32lock_1pct_write, 32, 0.01, true)
1683 BENCH_REL (shmtx_wr_pri, 32thr_32lock_1pct_write, 32, 0.01, true)
1684 BENCH_REL (shmtx_rd_pri, 32thr_32lock_1pct_write, 32, 0.01, true)
1685 BENCH_BASE(folly_rwspin, 64thr_64lock_1pct_write, 64, 0.01, true)
1686 BENCH_REL (shmtx_wr_pri, 64thr_64lock_1pct_write, 64, 0.01, true)
1687 BENCH_REL (shmtx_rd_pri, 64thr_64lock_1pct_write, 64, 0.01, true)
1688
1689 // Ping-pong tests have a scaled number of iterations, because their burn
1690 // loop would make them too slow otherwise.  Ping-pong with burn count of
1691 // 100k or 300k shows the advantage of soft-spin, reducing the cost of
1692 // each wakeup by about 20 usec.  (Take benchmark reported difference,
1693 // ~400 nanos, multiply by the scale of 100, then divide by 2 because
1694 // each round has two wakeups.)
1695 BENCHMARK_DRAW_LINE()
1696 BENCHMARK_DRAW_LINE()
1697 BENCH_BASE(folly_rwspin_ping_pong, burn0, 1, 0)
1698 BENCH_REL (shmtx_w_bare_ping_pong, burn0, 1, 0)
1699 BENCH_REL (shmtx_r_bare_ping_pong, burn0, 1, 0)
1700 BENCH_REL (folly_ticket_ping_pong, burn0, 1, 0)
1701 BENCH_REL (boost_shared_ping_pong, burn0, 1, 0)
1702 BENCH_REL (pthrd_rwlock_ping_pong, burn0, 1, 0)
1703 BENCHMARK_DRAW_LINE()
1704 BENCH_BASE(folly_rwspin_ping_pong, burn100k, 100, 100000)
1705 BENCH_REL (shmtx_w_bare_ping_pong, burn100k, 100, 100000)
1706 BENCH_REL (shmtx_r_bare_ping_pong, burn100k, 100, 100000)
1707 BENCH_REL (folly_ticket_ping_pong, burn100k, 100, 100000)
1708 BENCH_REL (boost_shared_ping_pong, burn100k, 100, 100000)
1709 BENCH_REL (pthrd_rwlock_ping_pong, burn100k, 100, 100000)
1710 BENCHMARK_DRAW_LINE()
1711 BENCH_BASE(folly_rwspin_ping_pong, burn300k, 100, 300000)
1712 BENCH_REL (shmtx_w_bare_ping_pong, burn300k, 100, 300000)
1713 BENCH_REL (shmtx_r_bare_ping_pong, burn300k, 100, 300000)
1714 BENCH_REL (folly_ticket_ping_pong, burn300k, 100, 300000)
1715 BENCH_REL (boost_shared_ping_pong, burn300k, 100, 300000)
1716 BENCH_REL (pthrd_rwlock_ping_pong, burn300k, 100, 300000)
1717 BENCHMARK_DRAW_LINE()
1718 BENCH_BASE(folly_rwspin_ping_pong, burn1M, 1000, 1000000)
1719 BENCH_REL (shmtx_w_bare_ping_pong, burn1M, 1000, 1000000)
1720 BENCH_REL (shmtx_r_bare_ping_pong, burn1M, 1000, 1000000)
1721 BENCH_REL (folly_ticket_ping_pong, burn1M, 1000, 1000000)
1722 BENCH_REL (boost_shared_ping_pong, burn1M, 1000, 1000000)
1723 BENCH_REL (pthrd_rwlock_ping_pong, burn1M, 1000, 1000000)
1724
1725 // Reproduce with 10 minutes and
1726 //   sudo nice -n -20
1727 //     shared_mutex_test --benchmark --bm_min_iters=1000000
1728 //
1729 // Comparison use folly::RWSpinLock as the baseline, with the
1730 // following row being the default SharedMutex (using *Holder or
1731 // Token-ful methods).
1732 //
1733 // Following results on 2-socket Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
1734 //
1735 // ============================================================================
1736 // folly/test/SharedMutexTest.cpp                  relative  time/iter  iters/s
1737 // ============================================================================
1738 // single_thread_lock_shared_unlock_shared                     25.17ns   39.74M
1739 // single_thread_lock_unlock                                   25.88ns   38.64M
1740 // ----------------------------------------------------------------------------
1741 // ----------------------------------------------------------------------------
1742 // folly_rwspin_reads(1thread)                                 15.16ns   65.95M
1743 // shmtx_wr_pri_reads(1thread)                       69.18%    21.92ns   45.63M
1744 // shmtx_w_bare_reads(1thread)                       56.07%    27.04ns   36.98M
1745 // shmtx_rd_pri_reads(1thread)                       69.06%    21.95ns   45.55M
1746 // shmtx_r_bare_reads(1thread)                       56.36%    26.90ns   37.17M
1747 // folly_ticket_reads(1thread)                       57.56%    26.34ns   37.96M
1748 // boost_shared_reads(1thread)                       10.55%   143.72ns    6.96M
1749 // pthrd_rwlock_reads(1thread)                       39.61%    38.28ns   26.12M
1750 // ----------------------------------------------------------------------------
1751 // folly_rwspin_reads(2thread)                                 45.05ns   22.20M
1752 // shmtx_wr_pri_reads(2thread)                      379.98%    11.86ns   84.34M
1753 // shmtx_w_bare_reads(2thread)                      319.27%    14.11ns   70.87M
1754 // shmtx_rd_pri_reads(2thread)                      385.59%    11.68ns   85.59M
1755 // shmtx_r_bare_reads(2thread)                      306.56%    14.70ns   68.04M
1756 // folly_ticket_reads(2thread)                       61.07%    73.78ns   13.55M
1757 // boost_shared_reads(2thread)                       13.54%   332.66ns    3.01M
1758 // pthrd_rwlock_reads(2thread)                       34.22%   131.65ns    7.60M
1759 // ----------------------------------------------------------------------------
1760 // folly_rwspin_reads(4thread)                                 62.19ns   16.08M
1761 // shmtx_wr_pri_reads(4thread)                     1022.82%     6.08ns  164.48M
1762 // shmtx_w_bare_reads(4thread)                      875.37%     7.10ns  140.76M
1763 // shmtx_rd_pri_reads(4thread)                     1060.46%     5.86ns  170.53M
1764 // shmtx_r_bare_reads(4thread)                      879.88%     7.07ns  141.49M
1765 // folly_ticket_reads(4thread)                       64.62%    96.23ns   10.39M
1766 // boost_shared_reads(4thread)                       14.86%   418.49ns    2.39M
1767 // pthrd_rwlock_reads(4thread)                       25.01%   248.65ns    4.02M
1768 // ----------------------------------------------------------------------------
1769 // folly_rwspin_reads(8thread)                                 64.09ns   15.60M
1770 // shmtx_wr_pri_reads(8thread)                     2191.99%     2.92ns  342.03M
1771 // shmtx_w_bare_reads(8thread)                     1804.92%     3.55ns  281.63M
1772 // shmtx_rd_pri_reads(8thread)                     2194.60%     2.92ns  342.44M
1773 // shmtx_r_bare_reads(8thread)                     1800.53%     3.56ns  280.95M
1774 // folly_ticket_reads(8thread)                       54.90%   116.74ns    8.57M
1775 // boost_shared_reads(8thread)                       18.25%   351.24ns    2.85M
1776 // pthrd_rwlock_reads(8thread)                       28.19%   227.31ns    4.40M
1777 // ----------------------------------------------------------------------------
1778 // folly_rwspin_reads(16thread)                                70.06ns   14.27M
1779 // shmtx_wr_pri_reads(16thread)                    4970.09%     1.41ns  709.38M
1780 // shmtx_w_bare_reads(16thread)                    4143.75%     1.69ns  591.44M
1781 // shmtx_rd_pri_reads(16thread)                    5009.31%     1.40ns  714.98M
1782 // shmtx_r_bare_reads(16thread)                    4067.36%     1.72ns  580.54M
1783 // folly_ticket_reads(16thread)                      46.78%   149.77ns    6.68M
1784 // boost_shared_reads(16thread)                      21.67%   323.37ns    3.09M
1785 // pthrd_rwlock_reads(16thread)                      35.05%   199.90ns    5.00M
1786 // ----------------------------------------------------------------------------
1787 // folly_rwspin_reads(32thread)                                58.83ns   17.00M
1788 // shmtx_wr_pri_reads(32thread)                    5158.37%     1.14ns  876.79M
1789 // shmtx_w_bare_reads(32thread)                    4246.03%     1.39ns  721.72M
1790 // shmtx_rd_pri_reads(32thread)                    4845.97%     1.21ns  823.69M
1791 // shmtx_r_bare_reads(32thread)                    4721.44%     1.25ns  802.52M
1792 // folly_ticket_reads(32thread)                      28.40%   207.15ns    4.83M
1793 // boost_shared_reads(32thread)                      17.08%   344.54ns    2.90M
1794 // pthrd_rwlock_reads(32thread)                      30.01%   196.02ns    5.10M
1795 // ----------------------------------------------------------------------------
1796 // folly_rwspin_reads(64thread)                                59.19ns   16.89M
1797 // shmtx_wr_pri_reads(64thread)                    3804.54%     1.56ns  642.76M
1798 // shmtx_w_bare_reads(64thread)                    3625.06%     1.63ns  612.43M
1799 // shmtx_rd_pri_reads(64thread)                    3418.19%     1.73ns  577.48M
1800 // shmtx_r_bare_reads(64thread)                    3416.98%     1.73ns  577.28M
1801 // folly_ticket_reads(64thread)                      30.53%   193.90ns    5.16M
1802 // boost_shared_reads(64thread)                      18.59%   318.47ns    3.14M
1803 // pthrd_rwlock_reads(64thread)                      31.35%   188.81ns    5.30M
1804 // ----------------------------------------------------------------------------
1805 // ----------------------------------------------------------------------------
1806 // folly_rwspin(1thread_all_write)                             23.77ns   42.06M
1807 // shmtx_wr_pri(1thread_all_write)                   85.09%    27.94ns   35.79M
1808 // shmtx_rd_pri(1thread_all_write)                   85.32%    27.87ns   35.89M
1809 // folly_ticket(1thread_all_write)                   88.11%    26.98ns   37.06M
1810 // boost_shared(1thread_all_write)                   16.49%   144.14ns    6.94M
1811 // pthrd_rwlock(1thread_all_write)                   53.99%    44.04ns   22.71M
1812 // pthrd_mutex_(1thread_all_write)                   86.05%    27.63ns   36.20M
1813 // ----------------------------------------------------------------------------
1814 // folly_rwspin(2thread_all_write)                             76.05ns   13.15M
1815 // shmtx_wr_pri(2thread_all_write)                   60.67%   125.35ns    7.98M
1816 // shmtx_rd_pri(2thread_all_write)                   60.36%   125.99ns    7.94M
1817 // folly_ticket(2thread_all_write)                  129.10%    58.91ns   16.98M
1818 // boost_shared(2thread_all_write)                   18.65%   407.74ns    2.45M
1819 // pthrd_rwlock(2thread_all_write)                   40.90%   185.92ns    5.38M
1820 // pthrd_mutex_(2thread_all_write)                  127.37%    59.71ns   16.75M
1821 // ----------------------------------------------------------------------------
1822 // folly_rwspin(4thread_all_write)                            207.17ns    4.83M
1823 // shmtx_wr_pri(4thread_all_write)                  119.42%   173.49ns    5.76M
1824 // shmtx_rd_pri(4thread_all_write)                  117.68%   176.05ns    5.68M
1825 // folly_ticket(4thread_all_write)                  182.39%   113.59ns    8.80M
1826 // boost_shared(4thread_all_write)                   11.98%     1.73us  578.46K
1827 // pthrd_rwlock(4thread_all_write)                   27.50%   753.25ns    1.33M
1828 // pthrd_mutex_(4thread_all_write)                  117.75%   175.95ns    5.68M
1829 // ----------------------------------------------------------------------------
1830 // folly_rwspin(8thread_all_write)                            326.50ns    3.06M
1831 // shmtx_wr_pri(8thread_all_write)                  125.47%   260.22ns    3.84M
1832 // shmtx_rd_pri(8thread_all_write)                  124.73%   261.76ns    3.82M
1833 // folly_ticket(8thread_all_write)                  253.39%   128.85ns    7.76M
1834 // boost_shared(8thread_all_write)                    6.36%     5.13us  194.87K
1835 // pthrd_rwlock(8thread_all_write)                   38.54%   847.09ns    1.18M
1836 // pthrd_mutex_(8thread_all_write)                  166.31%   196.32ns    5.09M
1837 // ----------------------------------------------------------------------------
1838 // folly_rwspin(16thread_all_write)                           729.89ns    1.37M
1839 // shmtx_wr_pri(16thread_all_write)                 219.91%   331.91ns    3.01M
1840 // shmtx_rd_pri(16thread_all_write)                 220.09%   331.62ns    3.02M
1841 // folly_ticket(16thread_all_write)                 390.06%   187.12ns    5.34M
1842 // boost_shared(16thread_all_write)                  10.27%     7.11us  140.72K
1843 // pthrd_rwlock(16thread_all_write)                 113.90%   640.84ns    1.56M
1844 // pthrd_mutex_(16thread_all_write)                 401.97%   181.58ns    5.51M
1845 // ----------------------------------------------------------------------------
1846 // folly_rwspin(32thread_all_write)                             1.55us  645.01K
1847 // shmtx_wr_pri(32thread_all_write)                 415.05%   373.54ns    2.68M
1848 // shmtx_rd_pri(32thread_all_write)                 258.45%   599.88ns    1.67M
1849 // folly_ticket(32thread_all_write)                 525.40%   295.09ns    3.39M
1850 // boost_shared(32thread_all_write)                  20.84%     7.44us  134.45K
1851 // pthrd_rwlock(32thread_all_write)                 254.16%   610.00ns    1.64M
1852 // pthrd_mutex_(32thread_all_write)                 852.51%   181.86ns    5.50M
1853 // ----------------------------------------------------------------------------
1854 // folly_rwspin(64thread_all_write)                             2.03us  492.00K
1855 // shmtx_wr_pri(64thread_all_write)                 517.65%   392.64ns    2.55M
1856 // shmtx_rd_pri(64thread_all_write)                 288.20%   705.24ns    1.42M
1857 // folly_ticket(64thread_all_write)                 638.22%   318.47ns    3.14M
1858 // boost_shared(64thread_all_write)                  27.56%     7.37us  135.61K
1859 // pthrd_rwlock(64thread_all_write)                 326.75%   622.04ns    1.61M
1860 // pthrd_mutex_(64thread_all_write)                1231.57%   165.04ns    6.06M
1861 // ----------------------------------------------------------------------------
1862 // ----------------------------------------------------------------------------
1863 // folly_rwspin(1thread_10pct_write)                           19.39ns   51.58M
1864 // shmtx_wr_pri(1thread_10pct_write)                 93.87%    20.65ns   48.42M
1865 // shmtx_rd_pri(1thread_10pct_write)                 93.60%    20.71ns   48.28M
1866 // folly_ticket(1thread_10pct_write)                 73.75%    26.29ns   38.04M
1867 // boost_shared(1thread_10pct_write)                 12.97%   149.53ns    6.69M
1868 // pthrd_rwlock(1thread_10pct_write)                 44.15%    43.92ns   22.77M
1869 // ----------------------------------------------------------------------------
1870 // folly_rwspin(2thread_10pct_write)                          227.88ns    4.39M
1871 // shmtx_wr_pri(2thread_10pct_write)                321.08%    70.98ns   14.09M
1872 // shmtx_rd_pri(2thread_10pct_write)                280.65%    81.20ns   12.32M
1873 // folly_ticket(2thread_10pct_write)                220.43%   103.38ns    9.67M
1874 // boost_shared(2thread_10pct_write)                 58.78%   387.71ns    2.58M
1875 // pthrd_rwlock(2thread_10pct_write)                112.68%   202.23ns    4.94M
1876 // ----------------------------------------------------------------------------
1877 // folly_rwspin(4thread_10pct_write)                          444.94ns    2.25M
1878 // shmtx_wr_pri(4thread_10pct_write)                470.35%    94.60ns   10.57M
1879 // shmtx_rd_pri(4thread_10pct_write)                349.08%   127.46ns    7.85M
1880 // folly_ticket(4thread_10pct_write)                305.64%   145.58ns    6.87M
1881 // boost_shared(4thread_10pct_write)                 44.43%     1.00us  998.57K
1882 // pthrd_rwlock(4thread_10pct_write)                100.59%   442.31ns    2.26M
1883 // ----------------------------------------------------------------------------
1884 // folly_rwspin(8thread_10pct_write)                          424.67ns    2.35M
1885 // shmtx_wr_pri(8thread_10pct_write)                337.53%   125.82ns    7.95M
1886 // shmtx_rd_pri(8thread_10pct_write)                232.32%   182.79ns    5.47M
1887 // folly_ticket(8thread_10pct_write)                206.59%   205.56ns    4.86M
1888 // boost_shared(8thread_10pct_write)                 19.45%     2.18us  457.90K
1889 // pthrd_rwlock(8thread_10pct_write)                 78.58%   540.42ns    1.85M
1890 // ----------------------------------------------------------------------------
1891 // folly_rwspin(16thread_10pct_write)                         727.04ns    1.38M
1892 // shmtx_wr_pri(16thread_10pct_write)               400.60%   181.49ns    5.51M
1893 // shmtx_rd_pri(16thread_10pct_write)               312.94%   232.33ns    4.30M
1894 // folly_ticket(16thread_10pct_write)               283.67%   256.30ns    3.90M
1895 // boost_shared(16thread_10pct_write)                15.87%     4.58us  218.32K
1896 // pthrd_rwlock(16thread_10pct_write)               131.28%   553.82ns    1.81M
1897 // ----------------------------------------------------------------------------
1898 // folly_rwspin(32thread_10pct_write)                         810.61ns    1.23M
1899 // shmtx_wr_pri(32thread_10pct_write)               429.61%   188.68ns    5.30M
1900 // shmtx_rd_pri(32thread_10pct_write)               321.13%   252.42ns    3.96M
1901 // folly_ticket(32thread_10pct_write)               247.65%   327.32ns    3.06M
1902 // boost_shared(32thread_10pct_write)                 8.34%     9.71us  102.94K
1903 // pthrd_rwlock(32thread_10pct_write)               144.28%   561.85ns    1.78M
1904 // ----------------------------------------------------------------------------
1905 // folly_rwspin(64thread_10pct_write)                           1.10us  912.30K
1906 // shmtx_wr_pri(64thread_10pct_write)               486.68%   225.22ns    4.44M
1907 // shmtx_rd_pri(64thread_10pct_write)               412.96%   265.43ns    3.77M
1908 // folly_ticket(64thread_10pct_write)               280.23%   391.15ns    2.56M
1909 // boost_shared(64thread_10pct_write)                 6.16%    17.79us   56.22K
1910 // pthrd_rwlock(64thread_10pct_write)               198.81%   551.34ns    1.81M
1911 // ----------------------------------------------------------------------------
1912 // ----------------------------------------------------------------------------
1913 // folly_rwspin(1thread_1pct_write)                            19.02ns   52.57M
1914 // shmtx_wr_pri(1thread_1pct_write)                  94.46%    20.14ns   49.66M
1915 // shmtx_w_bare(1thread_1pct_write)                  76.60%    24.83ns   40.27M
1916 // shmtx_rd_pri(1thread_1pct_write)                  93.83%    20.27ns   49.33M
1917 // shmtx_r_bare(1thread_1pct_write)                  77.04%    24.69ns   40.50M
1918 // folly_ticket(1thread_1pct_write)                  72.83%    26.12ns   38.29M
1919 // boost_shared(1thread_1pct_write)                  12.48%   152.44ns    6.56M
1920 // pthrd_rwlock(1thread_1pct_write)                  42.85%    44.39ns   22.53M
1921 // ----------------------------------------------------------------------------
1922 // folly_rwspin(2thread_1pct_write)                           110.63ns    9.04M
1923 // shmtx_wr_pri(2thread_1pct_write)                 442.12%    25.02ns   39.96M
1924 // shmtx_w_bare(2thread_1pct_write)                 374.65%    29.53ns   33.86M
1925 // shmtx_rd_pri(2thread_1pct_write)                 371.08%    29.81ns   33.54M
1926 // shmtx_r_bare(2thread_1pct_write)                 138.02%    80.15ns   12.48M
1927 // folly_ticket(2thread_1pct_write)                 131.34%    84.23ns   11.87M
1928 // boost_shared(2thread_1pct_write)                  30.35%   364.58ns    2.74M
1929 // pthrd_rwlock(2thread_1pct_write)                  95.48%   115.87ns    8.63M
1930 // ----------------------------------------------------------------------------
1931 // folly_rwspin(4thread_1pct_write)                           140.62ns    7.11M
1932 // shmtx_wr_pri(4thread_1pct_write)                 627.13%    22.42ns   44.60M
1933 // shmtx_w_bare(4thread_1pct_write)                 552.94%    25.43ns   39.32M
1934 // shmtx_rd_pri(4thread_1pct_write)                 226.06%    62.21ns   16.08M
1935 // shmtx_r_bare(4thread_1pct_write)                  77.61%   181.19ns    5.52M
1936 // folly_ticket(4thread_1pct_write)                 119.58%   117.60ns    8.50M
1937 // boost_shared(4thread_1pct_write)                  25.36%   554.54ns    1.80M
1938 // pthrd_rwlock(4thread_1pct_write)                  45.55%   308.72ns    3.24M
1939 // ----------------------------------------------------------------------------
1940 // folly_rwspin(8thread_1pct_write)                           166.23ns    6.02M
1941 // shmtx_wr_pri(8thread_1pct_write)                 687.09%    24.19ns   41.33M
1942 // shmtx_w_bare(8thread_1pct_write)                 611.80%    27.17ns   36.80M
1943 // shmtx_rd_pri(8thread_1pct_write)                 140.37%   118.43ns    8.44M
1944 // shmtx_r_bare(8thread_1pct_write)                  80.32%   206.97ns    4.83M
1945 // folly_ticket(8thread_1pct_write)                 117.06%   142.01ns    7.04M
1946 // boost_shared(8thread_1pct_write)                  22.29%   745.67ns    1.34M
1947 // pthrd_rwlock(8thread_1pct_write)                  49.84%   333.55ns    3.00M
1948 // ----------------------------------------------------------------------------
1949 // folly_rwspin(16thread_1pct_write)                          419.79ns    2.38M
1950 // shmtx_wr_pri(16thread_1pct_write)               1397.92%    30.03ns   33.30M
1951 // shmtx_w_bare(16thread_1pct_write)               1324.60%    31.69ns   31.55M
1952 // shmtx_rd_pri(16thread_1pct_write)                278.12%   150.94ns    6.63M
1953 // shmtx_r_bare(16thread_1pct_write)                194.25%   216.11ns    4.63M
1954 // folly_ticket(16thread_1pct_write)                255.38%   164.38ns    6.08M
1955 // boost_shared(16thread_1pct_write)                 33.71%     1.25us  803.01K
1956 // pthrd_rwlock(16thread_1pct_write)                131.96%   318.12ns    3.14M
1957 // ----------------------------------------------------------------------------
1958 // folly_rwspin(32thread_1pct_write)                          395.99ns    2.53M
1959 // shmtx_wr_pri(32thread_1pct_write)               1332.76%    29.71ns   33.66M
1960 // shmtx_w_bare(32thread_1pct_write)               1208.86%    32.76ns   30.53M
1961 // shmtx_rd_pri(32thread_1pct_write)                252.97%   156.54ns    6.39M
1962 // shmtx_r_bare(32thread_1pct_write)                193.79%   204.35ns    4.89M
1963 // folly_ticket(32thread_1pct_write)                173.16%   228.69ns    4.37M
1964 // boost_shared(32thread_1pct_write)                 17.00%     2.33us  429.40K
1965 // pthrd_rwlock(32thread_1pct_write)                129.88%   304.89ns    3.28M
1966 // ----------------------------------------------------------------------------
1967 // folly_rwspin(64thread_1pct_write)                          424.07ns    2.36M
1968 // shmtx_wr_pri(64thread_1pct_write)               1297.89%    32.67ns   30.61M
1969 // shmtx_w_bare(64thread_1pct_write)               1228.88%    34.51ns   28.98M
1970 // shmtx_rd_pri(64thread_1pct_write)                270.40%   156.83ns    6.38M
1971 // shmtx_r_bare(64thread_1pct_write)                218.05%   194.48ns    5.14M
1972 // folly_ticket(64thread_1pct_write)                171.44%   247.36ns    4.04M
1973 // boost_shared(64thread_1pct_write)                 10.60%     4.00us  249.95K
1974 // pthrd_rwlock(64thread_1pct_write)                143.80%   294.91ns    3.39M
1975 // ----------------------------------------------------------------------------
1976 // folly_rwspin(2thr_2lock_50pct_write)                        10.87ns   91.99M
1977 // shmtx_wr_pri(2thr_2lock_50pct_write)              83.71%    12.99ns   77.01M
1978 // shmtx_rd_pri(2thr_2lock_50pct_write)              84.08%    12.93ns   77.34M
1979 // folly_rwspin(4thr_4lock_50pct_write)                         5.32ns  188.12M
1980 // shmtx_wr_pri(4thr_4lock_50pct_write)              82.21%     6.47ns  154.65M
1981 // shmtx_rd_pri(4thr_4lock_50pct_write)              81.20%     6.55ns  152.75M
1982 // folly_rwspin(8thr_8lock_50pct_write)                         2.64ns  379.06M
1983 // shmtx_wr_pri(8thr_8lock_50pct_write)              81.26%     3.25ns  308.03M
1984 // shmtx_rd_pri(8thr_8lock_50pct_write)              80.95%     3.26ns  306.86M
1985 // folly_rwspin(16thr_16lock_50pct_write)                       1.52ns  656.77M
1986 // shmtx_wr_pri(16thr_16lock_50pct_write)            86.24%     1.77ns  566.41M
1987 // shmtx_rd_pri(16thr_16lock_50pct_write)            83.72%     1.82ns  549.82M
1988 // folly_rwspin(32thr_32lock_50pct_write)                       1.19ns  841.03M
1989 // shmtx_wr_pri(32thr_32lock_50pct_write)            85.08%     1.40ns  715.55M
1990 // shmtx_rd_pri(32thr_32lock_50pct_write)            86.44%     1.38ns  727.00M
1991 // folly_rwspin(64thr_64lock_50pct_write)                       1.46ns  684.28M
1992 // shmtx_wr_pri(64thr_64lock_50pct_write)            84.53%     1.73ns  578.43M
1993 // shmtx_rd_pri(64thr_64lock_50pct_write)            82.80%     1.76ns  566.58M
1994 // ----------------------------------------------------------------------------
1995 // folly_rwspin(2thr_2lock_10pct_write)                        10.01ns   99.85M
1996 // shmtx_wr_pri(2thr_2lock_10pct_write)              92.02%    10.88ns   91.88M
1997 // shmtx_rd_pri(2thr_2lock_10pct_write)              92.35%    10.84ns   92.22M
1998 // folly_rwspin(4thr_4lock_10pct_write)                         4.81ns  207.87M
1999 // shmtx_wr_pri(4thr_4lock_10pct_write)              89.32%     5.39ns  185.67M
2000 // shmtx_rd_pri(4thr_4lock_10pct_write)              88.96%     5.41ns  184.93M
2001 // folly_rwspin(8thr_8lock_10pct_write)                         2.39ns  417.62M
2002 // shmtx_wr_pri(8thr_8lock_10pct_write)              91.17%     2.63ns  380.76M
2003 // shmtx_rd_pri(8thr_8lock_10pct_write)              89.53%     2.67ns  373.92M
2004 // folly_rwspin(16thr_16lock_10pct_write)                       1.16ns  860.47M
2005 // shmtx_wr_pri(16thr_16lock_10pct_write)            74.35%     1.56ns  639.77M
2006 // shmtx_rd_pri(16thr_16lock_10pct_write)            91.34%     1.27ns  785.97M
2007 // folly_rwspin(32thr_32lock_10pct_write)                       1.15ns  866.23M
2008 // shmtx_wr_pri(32thr_32lock_10pct_write)            92.32%     1.25ns  799.72M
2009 // shmtx_rd_pri(32thr_32lock_10pct_write)            94.40%     1.22ns  817.71M
2010 // folly_rwspin(64thr_64lock_10pct_write)                       1.41ns  710.54M
2011 // shmtx_wr_pri(64thr_64lock_10pct_write)            94.14%     1.50ns  668.88M
2012 // shmtx_rd_pri(64thr_64lock_10pct_write)            94.80%     1.48ns  673.56M
2013 // ----------------------------------------------------------------------------
2014 // folly_rwspin(2thr_2lock_1pct_write)                          9.58ns  104.36M
2015 // shmtx_wr_pri(2thr_2lock_1pct_write)               92.00%    10.42ns   96.01M
2016 // shmtx_rd_pri(2thr_2lock_1pct_write)               91.79%    10.44ns   95.79M
2017 // folly_rwspin(4thr_4lock_1pct_write)                          4.71ns  212.30M
2018 // shmtx_wr_pri(4thr_4lock_1pct_write)               90.37%     5.21ns  191.85M
2019 // shmtx_rd_pri(4thr_4lock_1pct_write)               89.94%     5.24ns  190.95M
2020 // folly_rwspin(8thr_8lock_1pct_write)                          2.33ns  429.91M
2021 // shmtx_wr_pri(8thr_8lock_1pct_write)               90.67%     2.57ns  389.80M
2022 // shmtx_rd_pri(8thr_8lock_1pct_write)               90.61%     2.57ns  389.55M
2023 // folly_rwspin(16thr_16lock_1pct_write)                        1.10ns  905.23M
2024 // shmtx_wr_pri(16thr_16lock_1pct_write)             91.96%     1.20ns  832.46M
2025 // shmtx_rd_pri(16thr_16lock_1pct_write)             92.29%     1.20ns  835.42M
2026 // folly_rwspin(32thr_32lock_1pct_write)                        1.14ns  879.85M
2027 // shmtx_wr_pri(32thr_32lock_1pct_write)             93.41%     1.22ns  821.86M
2028 // shmtx_rd_pri(32thr_32lock_1pct_write)             94.18%     1.21ns  828.66M
2029 // folly_rwspin(64thr_64lock_1pct_write)                        1.34ns  748.83M
2030 // shmtx_wr_pri(64thr_64lock_1pct_write)             94.39%     1.41ns  706.84M
2031 // shmtx_rd_pri(64thr_64lock_1pct_write)             94.02%     1.42ns  704.06M
2032 // ----------------------------------------------------------------------------
2033 // ----------------------------------------------------------------------------
2034 // folly_rwspin_ping_pong(burn0)                              605.63ns    1.65M
2035 // shmtx_w_bare_ping_pong(burn0)                    102.17%   592.76ns    1.69M
2036 // shmtx_r_bare_ping_pong(burn0)                     88.75%   682.44ns    1.47M
2037 // folly_ticket_ping_pong(burn0)                     63.92%   947.56ns    1.06M
2038 // boost_shared_ping_pong(burn0)                      8.52%     7.11us  140.73K
2039 // pthrd_rwlock_ping_pong(burn0)                      7.88%     7.68us  130.15K
2040 // ----------------------------------------------------------------------------
2041 // folly_rwspin_ping_pong(burn100k)                           727.76ns    1.37M
2042 // shmtx_w_bare_ping_pong(burn100k)                 100.79%   722.09ns    1.38M
2043 // shmtx_r_bare_ping_pong(burn100k)                 101.98%   713.61ns    1.40M
2044 // folly_ticket_ping_pong(burn100k)                 102.80%   707.95ns    1.41M
2045 // boost_shared_ping_pong(burn100k)                  81.49%   893.02ns    1.12M
2046 // pthrd_rwlock_ping_pong(burn100k)                  71.05%     1.02us  976.30K
2047 // ----------------------------------------------------------------------------
2048 // folly_rwspin_ping_pong(burn300k)                             2.11us  473.46K
2049 // shmtx_w_bare_ping_pong(burn300k)                 100.06%     2.11us  473.72K
2050 // shmtx_r_bare_ping_pong(burn300k)                  98.93%     2.13us  468.39K
2051 // folly_ticket_ping_pong(burn300k)                  96.68%     2.18us  457.73K
2052 // boost_shared_ping_pong(burn300k)                  84.72%     2.49us  401.13K
2053 // pthrd_rwlock_ping_pong(burn300k)                  84.62%     2.50us  400.66K
2054 // ----------------------------------------------------------------------------
2055 // folly_rwspin_ping_pong(burn1M)                             709.70ns    1.41M
2056 // shmtx_w_bare_ping_pong(burn1M)                   100.28%   707.73ns    1.41M
2057 // shmtx_r_bare_ping_pong(burn1M)                    99.63%   712.37ns    1.40M
2058 // folly_ticket_ping_pong(burn1M)                   100.09%   709.05ns    1.41M
2059 // boost_shared_ping_pong(burn1M)                    94.09%   754.29ns    1.33M
2060 // pthrd_rwlock_ping_pong(burn1M)                    96.32%   736.82ns    1.36M
2061 // ============================================================================
2062
2063 int main(int argc, char** argv) {
2064   (void)folly_rwspin_reads;
2065   (void)shmtx_wr_pri_reads;
2066   (void)shmtx_w_bare_reads;
2067   (void)shmtx_rd_pri_reads;
2068   (void)shmtx_r_bare_reads;
2069   (void)folly_ticket_reads;
2070   (void)boost_shared_reads;
2071   (void)pthrd_rwlock_reads;
2072   (void)folly_rwspin;
2073   (void)shmtx_wr_pri;
2074   (void)shmtx_w_bare;
2075   (void)shmtx_rd_pri;
2076   (void)shmtx_r_bare;
2077   (void)folly_ticket;
2078   (void)boost_shared;
2079   (void)pthrd_rwlock;
2080   (void)pthrd_mutex_;
2081   (void)folly_rwspin_ping_pong;
2082   (void)shmtx_w_bare_ping_pong;
2083   (void)shmtx_r_bare_ping_pong;
2084   (void)folly_ticket_ping_pong;
2085   (void)boost_shared_ping_pong;
2086   (void)pthrd_rwlock_ping_pong;
2087
2088   testing::InitGoogleTest(&argc, argv);
2089   gflags::ParseCommandLineFlags(&argc, &argv, true);
2090   int rv = RUN_ALL_TESTS();
2091   folly::runBenchmarksOnFlag();
2092   return rv;
2093 }