/*
- * Copyright 2015 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#pragma once
#include <stdint.h>
+
#include <atomic>
#include <thread>
#include <type_traits>
+
#include <folly/Likely.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
#include <folly/detail/Futex.h>
-#include <sys/resource.h>
+#include <folly/portability/Asm.h>
+#include <folly/portability/SysResource.h>
// SharedMutex is a reader-writer lock. It is small, very fast, scalable
// on multi-core, and suitable for use when readers or writers may block.
// the shared slots. If you can conveniently pass state from lock
// acquisition to release then the fastest mechanism is to std::move
// the SharedMutex::ReadHolder instance or an SharedMutex::Token (using
-// lock_shared(Token&) and unlock_sahred(Token&)). The guard or token
+// lock_shared(Token&) and unlock_shared(Token&)). The guard or token
// will tell unlock_shared where in deferredReaders[] to look for the
// deferred lock. The Token-less version of unlock_shared() works in all
// cases, but is optimized for the common (no inter-thread handoff) case.
//
// If you have observed by profiling that your SharedMutex-s are getting
// cache misses on deferredReaders[] due to another SharedMutex user, then
-// you can use the tag type plus the RWDEFERREDLOCK_DECLARE_STATIC_STORAGE
-// macro to create your own instantiation of the type. The contention
-// threshold (see kNumSharedToStartDeferring) should make this unnecessary
-// in all but the most extreme cases. Make sure to check that the
-// increased icache and dcache footprint of the tagged result is worth it.
+// you can use the tag type to create your own instantiation of the type.
+// The contention threshold (see kNumSharedToStartDeferring) should make
+// this unnecessary in all but the most extreme cases. Make sure to check
+// that the increased icache and dcache footprint of the tagged result is
+// worth it.
+
+// SharedMutex's use of thread local storage is as an optimization, so
+// for the case where thread local storage is not supported, define it
+// away.
+#ifndef FOLLY_SHAREDMUTEX_TLS
+#if !FOLLY_MOBILE
+#define FOLLY_SHAREDMUTEX_TLS FOLLY_TLS
+#else
+#define FOLLY_SHAREDMUTEX_TLS
+#endif
+#endif
namespace folly {
uint16_t slot_;
};
-template <bool ReaderPriority,
- typename Tag_ = void,
- template <typename> class Atom = std::atomic,
- bool BlockImmediately = false>
+template <
+ bool ReaderPriority,
+ typename Tag_ = void,
+ template <typename> class Atom = std::atomic,
+ bool BlockImmediately = false>
class SharedMutexImpl {
public:
static constexpr bool kReaderPriority = ReaderPriority;
class UpgradeHolder;
class WriteHolder;
- SharedMutexImpl() : state_(0) {}
+ constexpr SharedMutexImpl() noexcept : state_(0) {}
SharedMutexImpl(const SharedMutexImpl&) = delete;
SharedMutexImpl(SharedMutexImpl&&) = delete;
// See https://sourceware.org/bugzilla/show_bug.cgi?id=13690 for a
// description about why this property needs to be explicitly mentioned.
~SharedMutexImpl() {
-#ifndef NDEBUG
- auto state = state_.load(std::memory_order_acquire);
+ auto state = state_.load(std::memory_order_relaxed);
+ if (UNLIKELY((state & kHasS) != 0)) {
+ cleanupTokenlessSharedDeferred(state);
+ }
+#ifndef NDEBUG
// if a futexWait fails to go to sleep because the value has been
// changed, we don't necessarily clean up the wait bits, so it is
// possible they will be set here in a correct system
assert((state & ~(kWaitingAny | kMayDefer)) == 0);
if ((state & kMayDefer) != 0) {
for (uint32_t slot = 0; slot < kMaxDeferredReaders; ++slot) {
- auto slotValue = deferredReader(slot)->load(std::memory_order_acquire);
+ auto slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
assert(!slotValueIsThis(slotValue));
}
}
// kPrevDefer so we can tell if the pre-lock() lock_shared() might
// have deferred
if ((state & (kMayDefer | kPrevDefer)) == 0 ||
- !tryUnlockAnySharedDeferred()) {
+ !tryUnlockTokenlessSharedDeferred()) {
// Matching lock_shared() couldn't have deferred, or the deferred
// lock has already been inlined by applyDeferredReaders()
unlockSharedInline();
void unlock_upgrade_and_lock_shared() {
auto state = (state_ -= kHasU - kIncrHasS);
- assert((state & (kWaitingNotS | kHasSolo)) == 0 && (state & kHasS) != 0);
+ assert((state & (kWaitingNotS | kHasSolo)) == 0);
wakeRegisteredWaiters(state, kWaitingE | kWaitingU);
}
bool canTimeOut() { return true; }
bool shouldTimeOut() { return true; }
- bool doWait(Futex& futex, uint32_t expected, uint32_t waitMask) {
+ bool doWait(Futex& /* futex */,
+ uint32_t /* expected */,
+ uint32_t /* waitMask */) {
return false;
}
};
// 32 bits of state
Futex state_;
+ // S count needs to be on the end, because we explicitly allow it to
+ // underflow. This can occur while we are in the middle of applying
+ // deferred locks (we remove them from deferredReaders[] before
+ // inlining them), or during token-less unlock_shared() if a racing
+ // lock_shared();unlock_shared() moves the deferredReaders slot while
+ // the first unlock_shared() is scanning. The former case is cleaned
+ // up before we finish applying the locks. The latter case can persist
+ // until destruction, when it is cleaned up.
static constexpr uint32_t kIncrHasS = 1 << 10;
static constexpr uint32_t kHasS = ~(kIncrHasS - 1);
// guarantees we won't have inter-L1 contention. We give ourselves
// a factor of 2 on the core count, which should hold us for a couple
// processor generations. deferredReaders[] is 2048 bytes currently.
+ public:
static constexpr uint32_t kMaxDeferredReaders = 64;
static constexpr uint32_t kDeferredSearchDistance = 2;
static constexpr uint32_t kDeferredSeparationFactor = 4;
+ private:
+
static_assert(!(kMaxDeferredReaders & (kMaxDeferredReaders - 1)),
"kMaxDeferredReaders must be a power of 2");
static_assert(!(kDeferredSearchDistance & (kDeferredSearchDistance - 1)),
static constexpr uintptr_t kTokenless = 0x1;
// This is the starting location for Token-less unlock_shared().
- static FOLLY_TLS uint32_t tls_lastTokenlessSlot;
+ static FOLLY_SHAREDMUTEX_TLS uint32_t tls_lastTokenlessSlot;
+
+ // Last deferred reader slot used.
+ static FOLLY_SHAREDMUTEX_TLS uint32_t tls_lastDeferredReaderSlot;
+
// Only indexes divisible by kDeferredSeparationFactor are used.
// If any of those elements points to a SharedMutexImpl, then it
// should be considered that there is a shared lock on that instance.
// See kTokenless.
+ public:
typedef Atom<uintptr_t> DeferredReaderSlot;
- static DeferredReaderSlot deferredReaders
+
+ private:
+ FOLLY_ALIGN_TO_AVOID_FALSE_SHARING static DeferredReaderSlot deferredReaders
[kMaxDeferredReaders *
- kDeferredSeparationFactor] FOLLY_ALIGN_TO_AVOID_FALSE_SHARING;
+ kDeferredSeparationFactor];
// Performs an exclusive lock, waiting for state_ & waitMask to be
// zero first
}
uint32_t after = (state & kMayDefer) == 0 ? 0 : kPrevDefer;
- if (!ReaderPriority || (state & (kMayDefer | kHasS)) == 0) {
+ if (!kReaderPriority || (state & (kMayDefer | kHasS)) == 0) {
// Block readers immediately, either because we are in write
// priority mode or because we can acquire the lock in one
// step. Note that if state has kHasU, then we are doing an
return false;
}
- if (ReaderPriority && (state & kHasE) == 0) {
+ if (kReaderPriority && (state & kHasE) == 0) {
assert((state & kBegunE) != 0);
if (!state_.compare_exchange_strong(state,
(state & ~kBegunE) | kHasE)) {
if ((state & goal) == 0) {
return true;
}
-#if FOLLY_X64
- asm volatile("pause");
-#endif
+ asm_volatile_pause();
++spinCount;
if (UNLIKELY(spinCount >= kMaxSpinCount)) {
return ctx.canBlock() &&
WaitContext& ctx) {
#ifdef RUSAGE_THREAD
struct rusage usage;
+ std::memset(&usage, 0, sizeof(usage));
long before = -1;
#endif
for (uint32_t yieldCount = 0; yieldCount < kMaxSoftYieldCount;
return;
}
}
-#if FOLLY_X64
- asm("pause");
-#endif
+ asm_volatile_pause();
if (UNLIKELY(++spinCount >= kMaxSpinCount)) {
applyDeferredReaders(state, ctx, slot);
return;
#ifdef RUSAGE_THREAD
struct rusage usage;
+ std::memset(&usage, 0, sizeof(usage));
long before = -1;
#endif
for (uint32_t yieldCount = 0; yieldCount < kMaxSoftYieldCount;
}
template <class WaitContext>
- bool lockSharedImpl(uint32_t& state, Token* token, WaitContext& ctx) {
- while (true) {
- if (UNLIKELY((state & kHasE) != 0) &&
- !waitForZeroBits(state, kHasE, kWaitingS, ctx) && ctx.canTimeOut()) {
- return false;
- }
-
- uint32_t slot;
- uintptr_t slotValue = 1; // any non-zero value will do
-
- bool canAlreadyDefer = (state & kMayDefer) != 0;
- bool aboveDeferThreshold =
- (state & kHasS) >= (kNumSharedToStartDeferring - 1) * kIncrHasS;
- bool drainInProgress = ReaderPriority && (state & kBegunE) != 0;
- if (canAlreadyDefer || (aboveDeferThreshold && !drainInProgress)) {
- // starting point for our empty-slot search, can change after
- // calling waitForZeroBits
- uint32_t bestSlot =
- (uint32_t)folly::detail::AccessSpreader<Atom>::current(
- kMaxDeferredReaders);
-
- // deferred readers are already enabled, or it is time to
- // enable them if we can find a slot
- for (uint32_t i = 0; i < kDeferredSearchDistance; ++i) {
- slot = bestSlot ^ i;
- assert(slot < kMaxDeferredReaders);
- slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
- if (slotValue == 0) {
- // found empty slot
- break;
- }
- }
- }
-
- if (slotValue != 0) {
- // not yet deferred, or no empty slots
- if (state_.compare_exchange_strong(state, state + kIncrHasS)) {
- // successfully recorded the read lock inline
- if (token != nullptr) {
- token->type_ = Token::Type::INLINE_SHARED;
- }
- return true;
- }
- // state is updated, try again
- continue;
- }
-
- // record that deferred readers might be in use if necessary
- if ((state & kMayDefer) == 0) {
- if (!state_.compare_exchange_strong(state, state | kMayDefer)) {
- // keep going if CAS failed because somebody else set the bit
- // for us
- if ((state & (kHasE | kMayDefer)) != kMayDefer) {
- continue;
- }
- }
- // state = state | kMayDefer;
- }
-
- // try to use the slot
- bool gotSlot = deferredReader(slot)->compare_exchange_strong(
- slotValue,
- token == nullptr ? tokenlessSlotValue() : tokenfulSlotValue());
-
- // If we got the slot, we need to verify that an exclusive lock
- // didn't happen since we last checked. If we didn't get the slot we
- // need to recheck state_ anyway to make sure we don't waste too much
- // work. It is also possible that since we checked state_ someone
- // has acquired and released the write lock, clearing kMayDefer.
- // Both cases are covered by looking for the readers-possible bit,
- // because it is off when the exclusive lock bit is set.
- state = state_.load(std::memory_order_acquire);
-
- if (!gotSlot) {
- continue;
- }
-
- if (token == nullptr) {
- tls_lastTokenlessSlot = slot;
- }
-
- if ((state & kMayDefer) != 0) {
- assert((state & kHasE) == 0);
- // success
- if (token != nullptr) {
- token->type_ = Token::Type::DEFERRED_SHARED;
- token->slot_ = (uint16_t)slot;
- }
- return true;
- }
-
- // release the slot before retrying
- if (token == nullptr) {
- // We can't rely on slot. Token-less slot values can be freed by
- // any unlock_shared(), so we need to do the full deferredReader
- // search during unlock. Unlike unlock_shared(), we can't trust
- // kPrevDefer here. This deferred lock isn't visible to lock()
- // (that's the whole reason we're undoing it) so there might have
- // subsequently been an unlock() and lock() with no intervening
- // transition to deferred mode.
- if (!tryUnlockAnySharedDeferred()) {
- unlockSharedInline();
- }
- } else {
- if (!tryUnlockSharedDeferred(slot)) {
- unlockSharedInline();
- }
- }
-
- // We got here not because the lock was unavailable, but because
- // we lost a compare-and-swap. Try-lock is typically allowed to
- // have spurious failures, but there is no lock efficiency gain
- // from exploiting that freedom here.
- }
- }
+ bool lockSharedImpl(uint32_t& state, Token* token, WaitContext& ctx);
- bool tryUnlockAnySharedDeferred() {
- auto bestSlot = tls_lastTokenlessSlot;
+ // Updates the state in/out argument as if the locks were made inline,
+ // but does not update state_
+ void cleanupTokenlessSharedDeferred(uint32_t& state) {
for (uint32_t i = 0; i < kMaxDeferredReaders; ++i) {
- auto slotPtr = deferredReader(bestSlot ^ i);
+ auto slotPtr = deferredReader(i);
auto slotValue = slotPtr->load(std::memory_order_relaxed);
- if (slotValue == tokenlessSlotValue() &&
- slotPtr->compare_exchange_strong(slotValue, 0)) {
- tls_lastTokenlessSlot = bestSlot ^ i;
- return true;
+ if (slotValue == tokenlessSlotValue()) {
+ slotPtr->store(0, std::memory_order_relaxed);
+ state += kIncrHasS;
+ if ((state & kHasS) == 0) {
+ break;
+ }
}
}
- return false;
}
+ bool tryUnlockTokenlessSharedDeferred();
+
bool tryUnlockSharedDeferred(uint32_t slot) {
assert(slot < kMaxDeferredReaders);
auto slotValue = tokenfulSlotValue();
uint32_t unlockSharedInline() {
uint32_t state = (state_ -= kIncrHasS);
- assert((state & (kHasE | kBegunE)) != 0 || state < state + kIncrHasS);
+ assert((state & (kHasE | kBegunE | kMayDefer)) != 0 ||
+ state < state + kIncrHasS);
if ((state & kHasS) == 0) {
// Only the second half of lock() can be blocked by a non-zero
// reader count, so that's the only thing we need to wake
public:
class ReadHolder {
- public:
ReadHolder() : lock_(nullptr) {}
- explicit ReadHolder(const SharedMutexImpl* lock) : ReadHolder(*lock) {}
+ public:
+ explicit ReadHolder(const SharedMutexImpl* lock)
+ : lock_(const_cast<SharedMutexImpl*>(lock)) {
+ if (lock_) {
+ lock_->lock_shared(token_);
+ }
+ }
explicit ReadHolder(const SharedMutexImpl& lock)
: lock_(const_cast<SharedMutexImpl*>(&lock)) {
ReadHolder& operator=(const ReadHolder& rhs) = delete;
~ReadHolder() {
+ unlock();
+ }
+
+ void unlock() {
if (lock_) {
lock_->unlock_shared(token_);
+ lock_ = nullptr;
}
}
};
class UpgradeHolder {
- public:
UpgradeHolder() : lock_(nullptr) {}
- explicit UpgradeHolder(SharedMutexImpl* lock) : UpgradeHolder(*lock) {}
+ public:
+ explicit UpgradeHolder(SharedMutexImpl* lock) : lock_(lock) {
+ if (lock_) {
+ lock_->lock_upgrade();
+ }
+ }
explicit UpgradeHolder(SharedMutexImpl& lock) : lock_(&lock) {
lock_->lock_upgrade();
UpgradeHolder& operator=(const UpgradeHolder& rhs) = delete;
~UpgradeHolder() {
+ unlock();
+ }
+
+ void unlock() {
if (lock_) {
lock_->unlock_upgrade();
+ lock_ = nullptr;
}
}
};
class WriteHolder {
- public:
WriteHolder() : lock_(nullptr) {}
- explicit WriteHolder(SharedMutexImpl* lock) : WriteHolder(*lock) {}
+ public:
+ explicit WriteHolder(SharedMutexImpl* lock) : lock_(lock) {
+ if (lock_) {
+ lock_->lock();
+ }
+ }
explicit WriteHolder(SharedMutexImpl& lock) : lock_(&lock) {
lock_->lock();
lock_->unlock_upgrade_and_lock();
}
+ // README:
+ //
+ // It is intended that WriteHolder(ReadHolder&& rhs) do not exist.
+ //
+ // Shared locks (read) can not safely upgrade to unique locks (write).
+ // That upgrade path is a well-known recipe for deadlock, so we explicitly
+ // disallow it.
+ //
+ // If you need to do a conditional mutation, you have a few options:
+ // 1. Check the condition under a shared lock and release it.
+ // Then maybe check the condition again under a unique lock and maybe do
+ // the mutation.
+ // 2. Check the condition once under an upgradeable lock.
+ // Then maybe upgrade the lock to a unique lock and do the mutation.
+ // 3. Check the condition and maybe perform the mutation under a unique
+ // lock.
+ //
+ // Relevant upgradeable lock notes:
+ // * At most one upgradeable lock can be held at a time for a given shared
+ // mutex, just like a unique lock.
+ // * An upgradeable lock may be held concurrently with any number of shared
+ // locks.
+ // * An upgradeable lock may be upgraded atomically to a unique lock.
+
WriteHolder(WriteHolder&& rhs) noexcept : lock_(rhs.lock_) {
rhs.lock_ = nullptr;
}
WriteHolder& operator=(const WriteHolder& rhs) = delete;
~WriteHolder() {
+ unlock();
+ }
+
+ void unlock() {
if (lock_) {
lock_->unlock();
+ lock_ = nullptr;
}
}
friend void acquireReadWrite(SharedMutexImpl& lock) { lock.lock(); }
friend void releaseRead(SharedMutexImpl& lock) { lock.unlock_shared(); }
friend void releaseReadWrite(SharedMutexImpl& lock) { lock.unlock(); }
+ friend bool acquireRead(SharedMutexImpl& lock, unsigned int ms) {
+ return lock.try_lock_shared_for(std::chrono::milliseconds(ms));
+ }
+ friend bool acquireReadWrite(SharedMutexImpl& lock, unsigned int ms) {
+ return lock.try_lock_for(std::chrono::milliseconds(ms));
+ }
};
-#define COMMON_CONCURRENCY_SHARED_MUTEX_DECLARE_STATIC_STORAGE(type) \
- template <> \
- type::DeferredReaderSlot \
- type::deferredReaders[type::kMaxDeferredReaders * \
- type::kDeferredSeparationFactor] = {}; \
- template <> \
- FOLLY_TLS uint32_t type::tls_lastTokenlessSlot = 0;
-
typedef SharedMutexImpl<true> SharedMutexReadPriority;
typedef SharedMutexImpl<false> SharedMutexWritePriority;
typedef SharedMutexWritePriority SharedMutex;
+// Prevent the compiler from instantiating these in other translation units.
+// They are instantiated once in SharedMutex.cpp
+extern template class SharedMutexImpl<true>;
+extern template class SharedMutexImpl<false>;
+
+template <
+ bool ReaderPriority,
+ typename Tag_,
+ template <typename> class Atom,
+ bool BlockImmediately>
+typename SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+ DeferredReaderSlot
+ SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+ deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor] =
+ {};
+
+template <
+ bool ReaderPriority,
+ typename Tag_,
+ template <typename> class Atom,
+ bool BlockImmediately>
+FOLLY_SHAREDMUTEX_TLS uint32_t
+ SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+ tls_lastTokenlessSlot = 0;
+
+template <
+ bool ReaderPriority,
+ typename Tag_,
+ template <typename> class Atom,
+ bool BlockImmediately>
+FOLLY_SHAREDMUTEX_TLS uint32_t
+ SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+ tls_lastDeferredReaderSlot = 0;
+
+template <
+ bool ReaderPriority,
+ typename Tag_,
+ template <typename> class Atom,
+ bool BlockImmediately>
+bool SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+ tryUnlockTokenlessSharedDeferred() {
+ auto bestSlot = tls_lastTokenlessSlot;
+ for (uint32_t i = 0; i < kMaxDeferredReaders; ++i) {
+ auto slotPtr = deferredReader(bestSlot ^ i);
+ auto slotValue = slotPtr->load(std::memory_order_relaxed);
+ if (slotValue == tokenlessSlotValue() &&
+ slotPtr->compare_exchange_strong(slotValue, 0)) {
+ tls_lastTokenlessSlot = bestSlot ^ i;
+ return true;
+ }
+ }
+ return false;
+}
+
+template <
+ bool ReaderPriority,
+ typename Tag_,
+ template <typename> class Atom,
+ bool BlockImmediately>
+template <class WaitContext>
+bool SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+ lockSharedImpl(uint32_t& state, Token* token, WaitContext& ctx) {
+ while (true) {
+ if (UNLIKELY((state & kHasE) != 0) &&
+ !waitForZeroBits(state, kHasE, kWaitingS, ctx) && ctx.canTimeOut()) {
+ return false;
+ }
+
+ uint32_t slot = tls_lastDeferredReaderSlot;
+ uintptr_t slotValue = 1; // any non-zero value will do
+
+ bool canAlreadyDefer = (state & kMayDefer) != 0;
+ bool aboveDeferThreshold =
+ (state & kHasS) >= (kNumSharedToStartDeferring - 1) * kIncrHasS;
+ bool drainInProgress = ReaderPriority && (state & kBegunE) != 0;
+ if (canAlreadyDefer || (aboveDeferThreshold && !drainInProgress)) {
+ /* Try using the most recent slot first. */
+ slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
+ if (slotValue != 0) {
+ // starting point for our empty-slot search, can change after
+ // calling waitForZeroBits
+ uint32_t bestSlot =
+ (uint32_t)folly::AccessSpreader<Atom>::current(kMaxDeferredReaders);
+
+ // deferred readers are already enabled, or it is time to
+ // enable them if we can find a slot
+ for (uint32_t i = 0; i < kDeferredSearchDistance; ++i) {
+ slot = bestSlot ^ i;
+ assert(slot < kMaxDeferredReaders);
+ slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
+ if (slotValue == 0) {
+ // found empty slot
+ tls_lastDeferredReaderSlot = slot;
+ break;
+ }
+ }
+ }
+ }
+
+ if (slotValue != 0) {
+ // not yet deferred, or no empty slots
+ if (state_.compare_exchange_strong(state, state + kIncrHasS)) {
+ // successfully recorded the read lock inline
+ if (token != nullptr) {
+ token->type_ = Token::Type::INLINE_SHARED;
+ }
+ return true;
+ }
+ // state is updated, try again
+ continue;
+ }
+
+ // record that deferred readers might be in use if necessary
+ if ((state & kMayDefer) == 0) {
+ if (!state_.compare_exchange_strong(state, state | kMayDefer)) {
+ // keep going if CAS failed because somebody else set the bit
+ // for us
+ if ((state & (kHasE | kMayDefer)) != kMayDefer) {
+ continue;
+ }
+ }
+ // state = state | kMayDefer;
+ }
+
+ // try to use the slot
+ bool gotSlot = deferredReader(slot)->compare_exchange_strong(
+ slotValue,
+ token == nullptr ? tokenlessSlotValue() : tokenfulSlotValue());
+
+ // If we got the slot, we need to verify that an exclusive lock
+ // didn't happen since we last checked. If we didn't get the slot we
+ // need to recheck state_ anyway to make sure we don't waste too much
+ // work. It is also possible that since we checked state_ someone
+ // has acquired and released the write lock, clearing kMayDefer.
+ // Both cases are covered by looking for the readers-possible bit,
+ // because it is off when the exclusive lock bit is set.
+ state = state_.load(std::memory_order_acquire);
+
+ if (!gotSlot) {
+ continue;
+ }
+
+ if (token == nullptr) {
+ tls_lastTokenlessSlot = slot;
+ }
+
+ if ((state & kMayDefer) != 0) {
+ assert((state & kHasE) == 0);
+ // success
+ if (token != nullptr) {
+ token->type_ = Token::Type::DEFERRED_SHARED;
+ token->slot_ = (uint16_t)slot;
+ }
+ return true;
+ }
+
+ // release the slot before retrying
+ if (token == nullptr) {
+ // We can't rely on slot. Token-less slot values can be freed by
+ // any unlock_shared(), so we need to do the full deferredReader
+ // search during unlock. Unlike unlock_shared(), we can't trust
+ // kPrevDefer here. This deferred lock isn't visible to lock()
+ // (that's the whole reason we're undoing it) so there might have
+ // subsequently been an unlock() and lock() with no intervening
+ // transition to deferred mode.
+ if (!tryUnlockTokenlessSharedDeferred()) {
+ unlockSharedInline();
+ }
+ } else {
+ if (!tryUnlockSharedDeferred(slot)) {
+ unlockSharedInline();
+ }
+ }
+
+ // We got here not because the lock was unavailable, but because
+ // we lost a compare-and-swap. Try-lock is typically allowed to
+ // have spurious failures, but there is no lock efficiency gain
+ // from exploiting that freedom here.
+ }
+}
+
} // namespace folly