X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FSharedMutex.h;h=976c6e09f80f9918aa1f599167aea16a00ab6f52;hp=796323dd0f175d67a578cbe43fbd9367ae656c06;hb=dcf8a19c8d08d7e730d3862c88396fdcdfa2813f;hpb=ae099ba2e39ce43cbddc4a41b7a8c611cc515541 diff --git a/folly/SharedMutex.h b/folly/SharedMutex.h index 796323dd..976c6e09 100644 --- a/folly/SharedMutex.h +++ b/folly/SharedMutex.h @@ -1,5 +1,5 @@ /* - * Copyright 2015 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,13 +19,16 @@ #pragma once #include + #include #include #include + #include -#include +#include #include -#include +#include +#include // SharedMutex is a reader-writer lock. It is small, very fast, scalable // on multi-core, and suitable for use when readers or writers may block. @@ -182,7 +185,7 @@ // the shared slots. If you can conveniently pass state from lock // acquisition to release then the fastest mechanism is to std::move // the SharedMutex::ReadHolder instance or an SharedMutex::Token (using -// lock_shared(Token&) and unlock_sahred(Token&)). The guard or token +// lock_shared(Token&) and unlock_shared(Token&)). The guard or token // will tell unlock_shared where in deferredReaders[] to look for the // deferred lock. The Token-less version of unlock_shared() works in all // cases, but is optimized for the common (no inter-thread handoff) case. @@ -203,11 +206,22 @@ // // If you have observed by profiling that your SharedMutex-s are getting // cache misses on deferredReaders[] due to another SharedMutex user, then -// you can use the tag type plus the RWDEFERREDLOCK_DECLARE_STATIC_STORAGE -// macro to create your own instantiation of the type. The contention -// threshold (see kNumSharedToStartDeferring) should make this unnecessary -// in all but the most extreme cases. Make sure to check that the -// increased icache and dcache footprint of the tagged result is worth it. +// you can use the tag type to create your own instantiation of the type. +// The contention threshold (see kNumSharedToStartDeferring) should make +// this unnecessary in all but the most extreme cases. Make sure to check +// that the increased icache and dcache footprint of the tagged result is +// worth it. + +// SharedMutex's use of thread local storage is as an optimization, so +// for the case where thread local storage is not supported, define it +// away. +#ifndef FOLLY_SHAREDMUTEX_TLS +#if !FOLLY_MOBILE +#define FOLLY_SHAREDMUTEX_TLS FOLLY_TLS +#else +#define FOLLY_SHAREDMUTEX_TLS +#endif +#endif namespace folly { @@ -222,10 +236,11 @@ struct SharedMutexToken { uint16_t slot_; }; -template class Atom = std::atomic, - bool BlockImmediately = false> +template < + bool ReaderPriority, + typename Tag_ = void, + template class Atom = std::atomic, + bool BlockImmediately = false> class SharedMutexImpl { public: static constexpr bool kReaderPriority = ReaderPriority; @@ -237,7 +252,7 @@ class SharedMutexImpl { class UpgradeHolder; class WriteHolder; - SharedMutexImpl() : state_(0) {} + constexpr SharedMutexImpl() noexcept : state_(0) {} SharedMutexImpl(const SharedMutexImpl&) = delete; SharedMutexImpl(SharedMutexImpl&&) = delete; @@ -496,7 +511,9 @@ class SharedMutexImpl { bool canTimeOut() { return true; } bool shouldTimeOut() { return true; } - bool doWait(Futex& futex, uint32_t expected, uint32_t waitMask) { + bool doWait(Futex& /* futex */, + uint32_t /* expected */, + uint32_t /* waitMask */) { return false; } }; @@ -546,7 +563,7 @@ class SharedMutexImpl { }; // 32 bits of state - Futex state_; + Futex state_{}; // S count needs to be on the end, because we explicitly allow it to // underflow. This can occur while we are in the middle of applying @@ -707,7 +724,11 @@ class SharedMutexImpl { static constexpr uintptr_t kTokenless = 0x1; // This is the starting location for Token-less unlock_shared(). - static FOLLY_TLS uint32_t tls_lastTokenlessSlot; + static FOLLY_SHAREDMUTEX_TLS uint32_t tls_lastTokenlessSlot; + + // Last deferred reader slot used. + static FOLLY_SHAREDMUTEX_TLS uint32_t tls_lastDeferredReaderSlot; + // Only indexes divisible by kDeferredSeparationFactor are used. // If any of those elements points to a SharedMutexImpl, then it @@ -717,9 +738,8 @@ class SharedMutexImpl { typedef Atom DeferredReaderSlot; private: - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING static DeferredReaderSlot deferredReaders - [kMaxDeferredReaders * - kDeferredSeparationFactor]; + alignas(hardware_destructive_interference_size) static DeferredReaderSlot + deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor]; // Performs an exclusive lock, waiting for state_ & waitMask to be // zero first @@ -747,7 +767,7 @@ class SharedMutexImpl { } uint32_t after = (state & kMayDefer) == 0 ? 0 : kPrevDefer; - if (!ReaderPriority || (state & (kMayDefer | kHasS)) == 0) { + if (!kReaderPriority || (state & (kMayDefer | kHasS)) == 0) { // Block readers immediately, either because we are in write // priority mode or because we can acquire the lock in one // step. Note that if state has kHasU, then we are doing an @@ -788,7 +808,7 @@ class SharedMutexImpl { return false; } - if (ReaderPriority && (state & kHasE) == 0) { + if (kReaderPriority && (state & kHasE) == 0) { assert((state & kBegunE) != 0); if (!state_.compare_exchange_strong(state, (state & ~kBegunE) | kHasE)) { @@ -829,6 +849,7 @@ class SharedMutexImpl { WaitContext& ctx) { #ifdef RUSAGE_THREAD struct rusage usage; + std::memset(&usage, 0, sizeof(usage)); long before = -1; #endif for (uint32_t yieldCount = 0; yieldCount < kMaxSoftYieldCount; @@ -971,7 +992,7 @@ class SharedMutexImpl { return; } } - asm_pause(); + asm_volatile_pause(); if (UNLIKELY(++spinCount >= kMaxSpinCount)) { applyDeferredReaders(state, ctx, slot); return; @@ -984,6 +1005,7 @@ class SharedMutexImpl { #ifdef RUSAGE_THREAD struct rusage usage; + std::memset(&usage, 0, sizeof(usage)); long before = -1; #endif for (uint32_t yieldCount = 0; yieldCount < kMaxSoftYieldCount; @@ -1064,121 +1086,7 @@ class SharedMutexImpl { } template - bool lockSharedImpl(uint32_t& state, Token* token, WaitContext& ctx) { - while (true) { - if (UNLIKELY((state & kHasE) != 0) && - !waitForZeroBits(state, kHasE, kWaitingS, ctx) && ctx.canTimeOut()) { - return false; - } - - uint32_t slot; - uintptr_t slotValue = 1; // any non-zero value will do - - bool canAlreadyDefer = (state & kMayDefer) != 0; - bool aboveDeferThreshold = - (state & kHasS) >= (kNumSharedToStartDeferring - 1) * kIncrHasS; - bool drainInProgress = ReaderPriority && (state & kBegunE) != 0; - if (canAlreadyDefer || (aboveDeferThreshold && !drainInProgress)) { - // starting point for our empty-slot search, can change after - // calling waitForZeroBits - uint32_t bestSlot = - (uint32_t)folly::detail::AccessSpreader::current( - kMaxDeferredReaders); - - // deferred readers are already enabled, or it is time to - // enable them if we can find a slot - for (uint32_t i = 0; i < kDeferredSearchDistance; ++i) { - slot = bestSlot ^ i; - assert(slot < kMaxDeferredReaders); - slotValue = deferredReader(slot)->load(std::memory_order_relaxed); - if (slotValue == 0) { - // found empty slot - break; - } - } - } - - if (slotValue != 0) { - // not yet deferred, or no empty slots - if (state_.compare_exchange_strong(state, state + kIncrHasS)) { - // successfully recorded the read lock inline - if (token != nullptr) { - token->type_ = Token::Type::INLINE_SHARED; - } - return true; - } - // state is updated, try again - continue; - } - - // record that deferred readers might be in use if necessary - if ((state & kMayDefer) == 0) { - if (!state_.compare_exchange_strong(state, state | kMayDefer)) { - // keep going if CAS failed because somebody else set the bit - // for us - if ((state & (kHasE | kMayDefer)) != kMayDefer) { - continue; - } - } - // state = state | kMayDefer; - } - - // try to use the slot - bool gotSlot = deferredReader(slot)->compare_exchange_strong( - slotValue, - token == nullptr ? tokenlessSlotValue() : tokenfulSlotValue()); - - // If we got the slot, we need to verify that an exclusive lock - // didn't happen since we last checked. If we didn't get the slot we - // need to recheck state_ anyway to make sure we don't waste too much - // work. It is also possible that since we checked state_ someone - // has acquired and released the write lock, clearing kMayDefer. - // Both cases are covered by looking for the readers-possible bit, - // because it is off when the exclusive lock bit is set. - state = state_.load(std::memory_order_acquire); - - if (!gotSlot) { - continue; - } - - if (token == nullptr) { - tls_lastTokenlessSlot = slot; - } - - if ((state & kMayDefer) != 0) { - assert((state & kHasE) == 0); - // success - if (token != nullptr) { - token->type_ = Token::Type::DEFERRED_SHARED; - token->slot_ = (uint16_t)slot; - } - return true; - } - - // release the slot before retrying - if (token == nullptr) { - // We can't rely on slot. Token-less slot values can be freed by - // any unlock_shared(), so we need to do the full deferredReader - // search during unlock. Unlike unlock_shared(), we can't trust - // kPrevDefer here. This deferred lock isn't visible to lock() - // (that's the whole reason we're undoing it) so there might have - // subsequently been an unlock() and lock() with no intervening - // transition to deferred mode. - if (!tryUnlockTokenlessSharedDeferred()) { - unlockSharedInline(); - } - } else { - if (!tryUnlockSharedDeferred(slot)) { - unlockSharedInline(); - } - } - - // We got here not because the lock was unavailable, but because - // we lost a compare-and-swap. Try-lock is typically allowed to - // have spurious failures, but there is no lock efficiency gain - // from exploiting that freedom here. - } - } + bool lockSharedImpl(uint32_t& state, Token* token, WaitContext& ctx); // Updates the state in/out argument as if the locks were made inline, // but does not update state_ @@ -1196,19 +1104,7 @@ class SharedMutexImpl { } } - bool tryUnlockTokenlessSharedDeferred() { - auto bestSlot = tls_lastTokenlessSlot; - for (uint32_t i = 0; i < kMaxDeferredReaders; ++i) { - auto slotPtr = deferredReader(bestSlot ^ i); - auto slotValue = slotPtr->load(std::memory_order_relaxed); - if (slotValue == tokenlessSlotValue() && - slotPtr->compare_exchange_strong(slotValue, 0)) { - tls_lastTokenlessSlot = bestSlot ^ i; - return true; - } - } - return false; - } + bool tryUnlockTokenlessSharedDeferred(); bool tryUnlockSharedDeferred(uint32_t slot) { assert(slot < kMaxDeferredReaders); @@ -1241,10 +1137,15 @@ class SharedMutexImpl { public: class ReadHolder { - public: ReadHolder() : lock_(nullptr) {} - explicit ReadHolder(const SharedMutexImpl* lock) : ReadHolder(*lock) {} + public: + explicit ReadHolder(const SharedMutexImpl* lock) + : lock_(const_cast(lock)) { + if (lock_) { + lock_->lock_shared(token_); + } + } explicit ReadHolder(const SharedMutexImpl& lock) : lock_(const_cast(&lock)) { @@ -1280,8 +1181,13 @@ class SharedMutexImpl { ReadHolder& operator=(const ReadHolder& rhs) = delete; ~ReadHolder() { + unlock(); + } + + void unlock() { if (lock_) { lock_->unlock_shared(token_); + lock_ = nullptr; } } @@ -1293,10 +1199,14 @@ class SharedMutexImpl { }; class UpgradeHolder { - public: UpgradeHolder() : lock_(nullptr) {} - explicit UpgradeHolder(SharedMutexImpl* lock) : UpgradeHolder(*lock) {} + public: + explicit UpgradeHolder(SharedMutexImpl* lock) : lock_(lock) { + if (lock_) { + lock_->lock_upgrade(); + } + } explicit UpgradeHolder(SharedMutexImpl& lock) : lock_(&lock) { lock_->lock_upgrade(); @@ -1322,8 +1232,13 @@ class SharedMutexImpl { UpgradeHolder& operator=(const UpgradeHolder& rhs) = delete; ~UpgradeHolder() { + unlock(); + } + + void unlock() { if (lock_) { lock_->unlock_upgrade(); + lock_ = nullptr; } } @@ -1334,10 +1249,14 @@ class SharedMutexImpl { }; class WriteHolder { - public: WriteHolder() : lock_(nullptr) {} - explicit WriteHolder(SharedMutexImpl* lock) : WriteHolder(*lock) {} + public: + explicit WriteHolder(SharedMutexImpl* lock) : lock_(lock) { + if (lock_) { + lock_->lock(); + } + } explicit WriteHolder(SharedMutexImpl& lock) : lock_(&lock) { lock_->lock(); @@ -1350,6 +1269,30 @@ class SharedMutexImpl { lock_->unlock_upgrade_and_lock(); } + // README: + // + // It is intended that WriteHolder(ReadHolder&& rhs) do not exist. + // + // Shared locks (read) can not safely upgrade to unique locks (write). + // That upgrade path is a well-known recipe for deadlock, so we explicitly + // disallow it. + // + // If you need to do a conditional mutation, you have a few options: + // 1. Check the condition under a shared lock and release it. + // Then maybe check the condition again under a unique lock and maybe do + // the mutation. + // 2. Check the condition once under an upgradeable lock. + // Then maybe upgrade the lock to a unique lock and do the mutation. + // 3. Check the condition and maybe perform the mutation under a unique + // lock. + // + // Relevant upgradeable lock notes: + // * At most one upgradeable lock can be held at a time for a given shared + // mutex, just like a unique lock. + // * An upgradeable lock may be held concurrently with any number of shared + // locks. + // * An upgradeable lock may be upgraded atomically to a unique lock. + WriteHolder(WriteHolder&& rhs) noexcept : lock_(rhs.lock_) { rhs.lock_ = nullptr; } @@ -1363,8 +1306,13 @@ class SharedMutexImpl { WriteHolder& operator=(const WriteHolder& rhs) = delete; ~WriteHolder() { + unlock(); + } + + void unlock() { if (lock_) { lock_->unlock(); + lock_ = nullptr; } } @@ -1387,16 +1335,189 @@ class SharedMutexImpl { } }; -#define COMMON_CONCURRENCY_SHARED_MUTEX_DECLARE_STATIC_STORAGE(type) \ - template <> \ - type::DeferredReaderSlot \ - type::deferredReaders[type::kMaxDeferredReaders * \ - type::kDeferredSeparationFactor] = {}; \ - template <> \ - FOLLY_TLS uint32_t type::tls_lastTokenlessSlot = 0; - typedef SharedMutexImpl SharedMutexReadPriority; typedef SharedMutexImpl SharedMutexWritePriority; typedef SharedMutexWritePriority SharedMutex; +// Prevent the compiler from instantiating these in other translation units. +// They are instantiated once in SharedMutex.cpp +extern template class SharedMutexImpl; +extern template class SharedMutexImpl; + +template < + bool ReaderPriority, + typename Tag_, + template class Atom, + bool BlockImmediately> +alignas(hardware_destructive_interference_size) + typename SharedMutexImpl:: + DeferredReaderSlot + SharedMutexImpl:: + deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor] = {}; + +template < + bool ReaderPriority, + typename Tag_, + template class Atom, + bool BlockImmediately> +FOLLY_SHAREDMUTEX_TLS uint32_t + SharedMutexImpl:: + tls_lastTokenlessSlot = 0; + +template < + bool ReaderPriority, + typename Tag_, + template class Atom, + bool BlockImmediately> +FOLLY_SHAREDMUTEX_TLS uint32_t + SharedMutexImpl:: + tls_lastDeferredReaderSlot = 0; + +template < + bool ReaderPriority, + typename Tag_, + template class Atom, + bool BlockImmediately> +bool SharedMutexImpl:: + tryUnlockTokenlessSharedDeferred() { + auto bestSlot = tls_lastTokenlessSlot; + for (uint32_t i = 0; i < kMaxDeferredReaders; ++i) { + auto slotPtr = deferredReader(bestSlot ^ i); + auto slotValue = slotPtr->load(std::memory_order_relaxed); + if (slotValue == tokenlessSlotValue() && + slotPtr->compare_exchange_strong(slotValue, 0)) { + tls_lastTokenlessSlot = bestSlot ^ i; + return true; + } + } + return false; +} + +template < + bool ReaderPriority, + typename Tag_, + template class Atom, + bool BlockImmediately> +template +bool SharedMutexImpl:: + lockSharedImpl(uint32_t& state, Token* token, WaitContext& ctx) { + while (true) { + if (UNLIKELY((state & kHasE) != 0) && + !waitForZeroBits(state, kHasE, kWaitingS, ctx) && ctx.canTimeOut()) { + return false; + } + + uint32_t slot = tls_lastDeferredReaderSlot; + uintptr_t slotValue = 1; // any non-zero value will do + + bool canAlreadyDefer = (state & kMayDefer) != 0; + bool aboveDeferThreshold = + (state & kHasS) >= (kNumSharedToStartDeferring - 1) * kIncrHasS; + bool drainInProgress = ReaderPriority && (state & kBegunE) != 0; + if (canAlreadyDefer || (aboveDeferThreshold && !drainInProgress)) { + /* Try using the most recent slot first. */ + slotValue = deferredReader(slot)->load(std::memory_order_relaxed); + if (slotValue != 0) { + // starting point for our empty-slot search, can change after + // calling waitForZeroBits + uint32_t bestSlot = + (uint32_t)folly::AccessSpreader::current(kMaxDeferredReaders); + + // deferred readers are already enabled, or it is time to + // enable them if we can find a slot + for (uint32_t i = 0; i < kDeferredSearchDistance; ++i) { + slot = bestSlot ^ i; + assert(slot < kMaxDeferredReaders); + slotValue = deferredReader(slot)->load(std::memory_order_relaxed); + if (slotValue == 0) { + // found empty slot + tls_lastDeferredReaderSlot = slot; + break; + } + } + } + } + + if (slotValue != 0) { + // not yet deferred, or no empty slots + if (state_.compare_exchange_strong(state, state + kIncrHasS)) { + // successfully recorded the read lock inline + if (token != nullptr) { + token->type_ = Token::Type::INLINE_SHARED; + } + return true; + } + // state is updated, try again + continue; + } + + // record that deferred readers might be in use if necessary + if ((state & kMayDefer) == 0) { + if (!state_.compare_exchange_strong(state, state | kMayDefer)) { + // keep going if CAS failed because somebody else set the bit + // for us + if ((state & (kHasE | kMayDefer)) != kMayDefer) { + continue; + } + } + // state = state | kMayDefer; + } + + // try to use the slot + bool gotSlot = deferredReader(slot)->compare_exchange_strong( + slotValue, + token == nullptr ? tokenlessSlotValue() : tokenfulSlotValue()); + + // If we got the slot, we need to verify that an exclusive lock + // didn't happen since we last checked. If we didn't get the slot we + // need to recheck state_ anyway to make sure we don't waste too much + // work. It is also possible that since we checked state_ someone + // has acquired and released the write lock, clearing kMayDefer. + // Both cases are covered by looking for the readers-possible bit, + // because it is off when the exclusive lock bit is set. + state = state_.load(std::memory_order_acquire); + + if (!gotSlot) { + continue; + } + + if (token == nullptr) { + tls_lastTokenlessSlot = slot; + } + + if ((state & kMayDefer) != 0) { + assert((state & kHasE) == 0); + // success + if (token != nullptr) { + token->type_ = Token::Type::DEFERRED_SHARED; + token->slot_ = (uint16_t)slot; + } + return true; + } + + // release the slot before retrying + if (token == nullptr) { + // We can't rely on slot. Token-less slot values can be freed by + // any unlock_shared(), so we need to do the full deferredReader + // search during unlock. Unlike unlock_shared(), we can't trust + // kPrevDefer here. This deferred lock isn't visible to lock() + // (that's the whole reason we're undoing it) so there might have + // subsequently been an unlock() and lock() with no intervening + // transition to deferred mode. + if (!tryUnlockTokenlessSharedDeferred()) { + unlockSharedInline(); + } + } else { + if (!tryUnlockSharedDeferred(slot)) { + unlockSharedInline(); + } + } + + // We got here not because the lock was unavailable, but because + // we lost a compare-and-swap. Try-lock is typically allowed to + // have spurious failures, but there is no lock efficiency gain + // from exploiting that freedom here. + } +} + } // namespace folly