From 7334598fed1fa92effebc904ec2a44d7e800c2b6 Mon Sep 17 00:00:00 2001 From: Nathan Bronson Date: Wed, 22 Jul 2015 15:14:58 -0700 Subject: [PATCH] fix racy assert in SharedMutex Summary: SharedMutex purposely allows the inline reader count to underflow in some situations while preserving proper locking behavior (the inline reader count is only trusted if all deferred locks have been applied), but there was an additional way that this could occur that wasn't documented or allowed by the asserts. The effect was a false positive assert in rare conditions or the possibility of losing track of a deferred lock slot. This diff fixes both the over-aggressive assert and the potential loss of the shared slot. If the assert didn't fire for you then this diff won't change the correctness of your program. Reviewed By: @yfeldblum Differential Revision: D2269018 --- folly/SharedMutex.h | 44 +++++++++++++++++++++++++++------- folly/test/SharedMutexTest.cpp | 4 ++-- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/folly/SharedMutex.h b/folly/SharedMutex.h index 69748161..37b39765 100644 --- a/folly/SharedMutex.h +++ b/folly/SharedMutex.h @@ -253,16 +253,19 @@ class SharedMutexImpl { // See https://sourceware.org/bugzilla/show_bug.cgi?id=13690 for a // description about why this property needs to be explicitly mentioned. ~SharedMutexImpl() { -#ifndef NDEBUG - auto state = state_.load(std::memory_order_acquire); + auto state = state_.load(std::memory_order_relaxed); + if (UNLIKELY((state & kHasS) != 0)) { + cleanupTokenlessSharedDeferred(state); + } +#ifndef NDEBUG // if a futexWait fails to go to sleep because the value has been // changed, we don't necessarily clean up the wait bits, so it is // possible they will be set here in a correct system assert((state & ~(kWaitingAny | kMayDefer)) == 0); if ((state & kMayDefer) != 0) { for (uint32_t slot = 0; slot < kMaxDeferredReaders; ++slot) { - auto slotValue = deferredReader(slot)->load(std::memory_order_acquire); + auto slotValue = deferredReader(slot)->load(std::memory_order_relaxed); assert(!slotValueIsThis(slotValue)); } } @@ -361,7 +364,7 @@ class SharedMutexImpl { // kPrevDefer so we can tell if the pre-lock() lock_shared() might // have deferred if ((state & (kMayDefer | kPrevDefer)) == 0 || - !tryUnlockAnySharedDeferred()) { + !tryUnlockTokenlessSharedDeferred()) { // Matching lock_shared() couldn't have deferred, or the deferred // lock has already been inlined by applyDeferredReaders() unlockSharedInline(); @@ -441,7 +444,7 @@ class SharedMutexImpl { void unlock_upgrade_and_lock_shared() { auto state = (state_ -= kHasU - kIncrHasS); - assert((state & (kWaitingNotS | kHasSolo)) == 0 && (state & kHasS) != 0); + assert((state & (kWaitingNotS | kHasSolo)) == 0); wakeRegisteredWaiters(state, kWaitingE | kWaitingU); } @@ -545,6 +548,14 @@ class SharedMutexImpl { // 32 bits of state Futex state_; + // S count needs to be on the end, because we explicitly allow it to + // underflow. This can occur while we are in the middle of applying + // deferred locks (we remove them from deferredReaders[] before + // inlining them), or during token-less unlock_shared() if a racing + // lock_shared();unlock_shared() moves the deferredReaders slot while + // the first unlock_shared() is scanning. The former case is cleaned + // up before we finish applying the locks. The latter case can persist + // until destruction, when it is cleaned up. static constexpr uint32_t kIncrHasS = 1 << 10; static constexpr uint32_t kHasS = ~(kIncrHasS - 1); @@ -1147,7 +1158,7 @@ class SharedMutexImpl { // (that's the whole reason we're undoing it) so there might have // subsequently been an unlock() and lock() with no intervening // transition to deferred mode. - if (!tryUnlockAnySharedDeferred()) { + if (!tryUnlockTokenlessSharedDeferred()) { unlockSharedInline(); } } else { @@ -1163,7 +1174,23 @@ class SharedMutexImpl { } } - bool tryUnlockAnySharedDeferred() { + // Updates the state in/out argument as if the locks were made inline, + // but does not update state_ + void cleanupTokenlessSharedDeferred(uint32_t& state) { + for (uint32_t i = 0; i < kMaxDeferredReaders; ++i) { + auto slotPtr = deferredReader(i); + auto slotValue = slotPtr->load(std::memory_order_relaxed); + if (slotValue == tokenlessSlotValue()) { + slotPtr->store(0, std::memory_order_relaxed); + state += kIncrHasS; + if ((state & kHasS) == 0) { + break; + } + } + } + } + + bool tryUnlockTokenlessSharedDeferred() { auto bestSlot = tls_lastTokenlessSlot; for (uint32_t i = 0; i < kMaxDeferredReaders; ++i) { auto slotPtr = deferredReader(bestSlot ^ i); @@ -1185,7 +1212,8 @@ class SharedMutexImpl { uint32_t unlockSharedInline() { uint32_t state = (state_ -= kIncrHasS); - assert((state & (kHasE | kBegunE)) != 0 || state < state + kIncrHasS); + assert((state & (kHasE | kBegunE | kMayDefer)) != 0 || + state < state + kIncrHasS); if ((state & kHasS) == 0) { // Only the second half of lock() can be blocked by a non-zero // reader count, so that's the only thing we need to wake diff --git a/folly/test/SharedMutexTest.cpp b/folly/test/SharedMutexTest.cpp index 3b374f09..276d5711 100644 --- a/folly/test/SharedMutexTest.cpp +++ b/folly/test/SharedMutexTest.cpp @@ -1205,13 +1205,13 @@ TEST(SharedMutex, deterministic_remote_read_prio) { } TEST(SharedMutex, remote_write_prio) { - for (int pass = 0; pass < 1; ++pass) { + for (int pass = 0; pass < 10; ++pass) { runRemoteUnlock(100000, 0.1, 0.1, 5, 5); } } TEST(SharedMutex, remote_read_prio) { - for (int pass = 0; pass < 1; ++pass) { + for (int pass = 0; pass < 100; ++pass) { runRemoteUnlock(100000, 0.1, 0.1, 5, 5); } } -- 2.34.1