X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FRWSpinLock.h;h=8edd6b3279679d82c68e0c8614b594e539939c8d;hb=2ca10eeefdc1231a20fb1cb75dc3e8d2c5fa70ba;hp=ec040756564ae7aa4478dbc08cdd152ae76a03a8;hpb=27494a20393fa45072e7d526d358835f3abe312a;p=folly.git diff --git a/folly/RWSpinLock.h b/folly/RWSpinLock.h index ec040756..8edd6b32 100644 --- a/folly/RWSpinLock.h +++ b/folly/RWSpinLock.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,22 @@ */ /* + * N.B. You most likely do _not_ want to use RWSpinLock or any other + * kind of spinlock. Use SharedMutex instead. + * + * In short, spinlocks in preemptive multi-tasking operating systems + * have serious problems and fast mutexes like SharedMutex are almost + * certainly the better choice, because letting the OS scheduler put a + * thread to sleep is better for system responsiveness and throughput + * than wasting a timeslice repeatedly querying a lock held by a + * thread that's blocked, and you can't prevent userspace + * programs blocking. + * + * Spinlocks in an operating system kernel make much more sense than + * they do in userspace. + * + * ------------------------------------------------------------------- + * * Two Read-Write spin lock implementations. * * Ref: http://locklessinc.com/articles/locks @@ -24,12 +40,14 @@ * are very compact (4/8 bytes), so are suitable for per-instance * based locking, particularly when contention is not expected. * - * In most cases, RWSpinLock is a reasonable choice. It has minimal - * overhead, and comparable contention performance when the number of - * competing threads is less than or equal to the number of logical - * CPUs. Even as the number of threads gets larger, RWSpinLock can - * still be very competitive in READ, although it is slower on WRITE, - * and also inherently unfair to writers. + * For a spinlock, RWSpinLock is a reasonable choice. (See the note + * about for why a spin lock is frequently a bad idea generally.) + * RWSpinLock has minimal overhead, and comparable contention + * performance when the number of competing threads is less than or + * equal to the number of logical CPUs. Even as the number of + * threads gets larger, RWSpinLock can still be very competitive in + * READ, although it is slower on WRITE, and also inherently unfair + * to writers. * * RWTicketSpinLock shows more balanced READ/WRITE performance. If * your application really needs a lot more threads, and a @@ -46,13 +64,24 @@ * RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent * readers and writers. * + * RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict + * writer priority) is NOT reentrant, even for lock_shared(). + * + * The lock will not grant any new shared (read) accesses while a thread + * attempting to acquire the lock in write mode is blocked. (That is, + * if the lock is held in shared mode by N threads, and a thread attempts + * to acquire it in write mode, no one else can acquire it in shared mode + * until these N threads release the lock and then the blocked thread + * acquires and releases the exclusive lock.) This also applies for + * attempts to reacquire the lock in shared mode by threads that already + * hold it in shared mode, making the lock non-reentrant. + * * RWSpinLock handles 2^30 - 1 concurrent readers. * * @author Xin Liu */ -#ifndef FOLLY_RWSPINLOCK_H_ -#define FOLLY_RWSPINLOCK_H_ +#pragma once /* ======================================================================== @@ -106,23 +135,33 @@ pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us */ -#if defined(__GNUC__) && (defined(__i386) || defined(__x86_64__) || \ - defined(ARCH_K8)) +#include +#include + +#if defined(__GNUC__) && (defined(__i386) || FOLLY_X64 || defined(ARCH_K8)) #define RW_SPINLOCK_USE_X86_INTRINSIC_ #include +#elif defined(_MSC_VER) && defined(FOLLY_X64) +#define RW_SPINLOCK_USE_X86_INTRINSIC_ #else #undef RW_SPINLOCK_USE_X86_INTRINSIC_ #endif +// iOS doesn't define _mm_cvtsi64_si128 and friends +#if (FOLLY_SSE >= 2) && !FOLLY_MOBILE +#define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ +#else +#undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ +#endif + +#include #include #include -#include -#include +#include -#include #include -#include "folly/Likely.h" +#include namespace folly { @@ -141,16 +180,21 @@ namespace folly { * UpgradeLockable concepts except the TimedLockable related locking/unlocking * interfaces. */ -class RWSpinLock : boost::noncopyable { +class RWSpinLock { enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 }; public: - RWSpinLock() : bits_(0) {} + constexpr RWSpinLock() : bits_(0) {} + + RWSpinLock(RWSpinLock const&) = delete; + RWSpinLock& operator=(RWSpinLock const&) = delete; // Lockable Concept void lock() { - int count = 0; + uint_fast32_t count = 0; while (!LIKELY(try_lock())) { - if (++count > 1000) sched_yield(); + if (++count > 1000) { + std::this_thread::yield(); + } } } @@ -162,9 +206,11 @@ class RWSpinLock : boost::noncopyable { // SharedLockable Concept void lock_shared() { - int count = 0; + uint_fast32_t count = 0; while (!LIKELY(try_lock_shared())) { - if (++count > 1000) sched_yield(); + if (++count > 1000) { + std::this_thread::yield(); + } } } @@ -180,9 +226,11 @@ class RWSpinLock : boost::noncopyable { // UpgradeLockable Concept void lock_upgrade() { - int count = 0; + uint_fast32_t count = 0; while (!try_lock_upgrade()) { - if (++count > 1000) sched_yield(); + if (++count > 1000) { + std::this_thread::yield(); + } } } @@ -194,7 +242,9 @@ class RWSpinLock : boost::noncopyable { void unlock_upgrade_and_lock() { int64_t count = 0; while (!try_unlock_upgrade_and_lock()) { - if (++count > 1000) sched_yield(); + if (++count > 1000) { + std::this_thread::yield(); + } } } @@ -203,11 +253,6 @@ class RWSpinLock : boost::noncopyable { bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel); } - void unlock_shared_and_lock_upgrade() { - lock_upgrade(); - unlock_shared(); - } - // write unlock and upgrade lock atomically void unlock_and_lock_upgrade() { // need to do it in two steps here -- as the UPGRADED bit might be OR-ed at @@ -225,12 +270,16 @@ class RWSpinLock : boost::noncopyable { } // Try to get reader permission on the lock. This can fail if we - // find out someone is a writer. + // find out someone is a writer or upgrader. + // Setting the UPGRADED bit would allow a writer-to-be to indicate + // its intention to write and block any new readers while waiting + // for existing readers to finish and release their read locks. This + // helps avoid starving writers (promoted from upgraders). bool try_lock_shared() { // fetch_add is considerably (100%) faster than compare_exchange, // so here we are optimizing for the common (lock success) case. int32_t value = bits_.fetch_add(READER, std::memory_order_acquire); - if (UNLIKELY(value & WRITER)) { + if (UNLIKELY(value & (WRITER|UPGRADED))) { bits_.fetch_add(-READER, std::memory_order_release); return false; } @@ -264,27 +313,33 @@ class RWSpinLock : boost::noncopyable { class ReadHolder { public: - explicit ReadHolder(RWSpinLock* lock = nullptr) : lock_(lock) { - if (lock_) lock_->lock_shared(); + explicit ReadHolder(RWSpinLock* lock) : lock_(lock) { + if (lock_) { + lock_->lock_shared(); + } } explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) { lock_->lock_shared(); } - ReadHolder(ReadHolder&& other) : lock_(other.lock_) { + ReadHolder(ReadHolder&& other) noexcept : lock_(other.lock_) { other.lock_ = nullptr; } // down-grade explicit ReadHolder(UpgradedHolder&& upgraded) : lock_(upgraded.lock_) { upgraded.lock_ = nullptr; - if (lock_) lock_->unlock_upgrade_and_lock_shared(); + if (lock_) { + lock_->unlock_upgrade_and_lock_shared(); + } } explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) { writer.lock_ = nullptr; - if (lock_) lock_->unlock_and_lock_shared(); + if (lock_) { + lock_->unlock_and_lock_shared(); + } } ReadHolder& operator=(ReadHolder&& other) { @@ -296,13 +351,23 @@ class RWSpinLock : boost::noncopyable { ReadHolder(const ReadHolder& other) = delete; ReadHolder& operator=(const ReadHolder& other) = delete; - ~ReadHolder() { if (lock_) lock_->unlock_shared(); } + ~ReadHolder() { + if (lock_) { + lock_->unlock_shared(); + } + } void reset(RWSpinLock* lock = nullptr) { - if (lock == lock_) return; - if (lock_) lock_->unlock_shared(); + if (lock == lock_) { + return; + } + if (lock_) { + lock_->unlock_shared(); + } lock_ = lock; - if (lock_) lock_->lock_shared(); + if (lock_) { + lock_->lock_shared(); + } } void swap(ReadHolder* other) { @@ -317,27 +382,25 @@ class RWSpinLock : boost::noncopyable { class UpgradedHolder { public: - explicit UpgradedHolder(RWSpinLock* lock = nullptr) : lock_(lock) { - if (lock_) lock_->lock_upgrade(); + explicit UpgradedHolder(RWSpinLock* lock) : lock_(lock) { + if (lock_) { + lock_->lock_upgrade(); + } } explicit UpgradedHolder(RWSpinLock& lock) : lock_(&lock) { lock_->lock_upgrade(); } - explicit UpgradedHolder(ReadHolder&& reader) { - lock_ = reader.lock_; - reader.lock_ = nullptr; - if (lock_) lock_->unlock_shared_and_lock_upgrade(); - } - explicit UpgradedHolder(WriteHolder&& writer) { lock_ = writer.lock_; writer.lock_ = nullptr; - if (lock_) lock_->unlock_and_lock_upgrade(); + if (lock_) { + lock_->unlock_and_lock_upgrade(); + } } - UpgradedHolder(UpgradedHolder&& other) : lock_(other.lock_) { + UpgradedHolder(UpgradedHolder&& other) noexcept : lock_(other.lock_) { other.lock_ = nullptr; } @@ -350,13 +413,23 @@ class RWSpinLock : boost::noncopyable { UpgradedHolder(const UpgradedHolder& other) = delete; UpgradedHolder& operator =(const UpgradedHolder& other) = delete; - ~UpgradedHolder() { if (lock_) lock_->unlock_upgrade(); } + ~UpgradedHolder() { + if (lock_) { + lock_->unlock_upgrade(); + } + } void reset(RWSpinLock* lock = nullptr) { - if (lock == lock_) return; - if (lock_) lock_->unlock_upgrade(); + if (lock == lock_) { + return; + } + if (lock_) { + lock_->unlock_upgrade(); + } lock_ = lock; - if (lock_) lock_->lock_upgrade(); + if (lock_) { + lock_->lock_upgrade(); + } } void swap(UpgradedHolder* other) { @@ -372,8 +445,10 @@ class RWSpinLock : boost::noncopyable { class WriteHolder { public: - explicit WriteHolder(RWSpinLock* lock = nullptr) : lock_(lock) { - if (lock_) lock_->lock(); + explicit WriteHolder(RWSpinLock* lock) : lock_(lock) { + if (lock_) { + lock_->lock(); + } } explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) { @@ -384,10 +459,12 @@ class RWSpinLock : boost::noncopyable { explicit WriteHolder(UpgradedHolder&& upgraded) { lock_ = upgraded.lock_; upgraded.lock_ = nullptr; - if (lock_) lock_->unlock_upgrade_and_lock(); + if (lock_) { + lock_->unlock_upgrade_and_lock(); + } } - WriteHolder(WriteHolder&& other) : lock_(other.lock_) { + WriteHolder(WriteHolder&& other) noexcept : lock_(other.lock_) { other.lock_ = nullptr; } @@ -400,13 +477,23 @@ class RWSpinLock : boost::noncopyable { WriteHolder(const WriteHolder& other) = delete; WriteHolder& operator =(const WriteHolder& other) = delete; - ~WriteHolder () { if (lock_) lock_->unlock(); } + ~WriteHolder() { + if (lock_) { + lock_->unlock(); + } + } void reset(RWSpinLock* lock = nullptr) { - if (lock == lock_) return; - if (lock_) lock_->unlock(); + if (lock == lock_) { + return; + } + if (lock_) { + lock_->unlock(); + } lock_ = lock; - if (lock_) lock_->lock(); + if (lock_) { + lock_->lock(); + } } void swap(WriteHolder* other) { @@ -420,12 +507,6 @@ class RWSpinLock : boost::noncopyable { RWSpinLock* lock_; }; - // Synchronized<> adaptors - friend void acquireRead(RWSpinLock& l) { return l.lock_shared(); } - friend void acquireReadWrite(RWSpinLock& l) { return l.lock(); } - friend void releaseRead(RWSpinLock& l) { return l.unlock_shared(); } - friend void releaseReadWrite(RWSpinLock& l) { return l.unlock(); } - private: std::atomic bits_; }; @@ -446,15 +527,16 @@ struct RWTicketIntTrait<64> { typedef uint32_t HalfInt; typedef uint16_t QuarterInt; -#ifdef __SSE2__ +#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ static __m128i make128(const uint16_t v[4]) { - return _mm_set_epi16(0, 0, 0, 0, v[3], v[2], v[1], v[0]); + return _mm_set_epi16(0, 0, 0, 0, + short(v[3]), short(v[2]), short(v[1]), short(v[0])); } static inline __m128i fromInteger(uint64_t from) { - return _mm_cvtsi64_si128(from); + return _mm_cvtsi64_si128(int64_t(from)); } static inline uint64_t toInteger(__m128i in) { - return _mm_cvtsi128_si64(in); + return uint64_t(_mm_cvtsi128_si64(in)); } static inline uint64_t addParallel(__m128i in, __m128i kDelta) { return toInteger(_mm_add_epi16(in, kDelta)); @@ -468,27 +550,29 @@ struct RWTicketIntTrait<32> { typedef uint16_t HalfInt; typedef uint8_t QuarterInt; -#ifdef __SSE2__ +#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ static __m128i make128(const uint8_t v[4]) { - return _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, v[3], v[2], v[1], v[0]); + return _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + char(v[3]), char(v[2]), char(v[1]), char(v[0])); } static inline __m128i fromInteger(uint32_t from) { - return _mm_cvtsi32_si128(from); + return _mm_cvtsi32_si128(int32_t(from)); } static inline uint32_t toInteger(__m128i in) { - return _mm_cvtsi128_si32(in); + return uint32_t(_mm_cvtsi128_si32(in)); } static inline uint32_t addParallel(__m128i in, __m128i kDelta) { return toInteger(_mm_add_epi8(in, kDelta)); } #endif }; -} // detail - +} // namespace detail -template -class RWTicketSpinLockT : boost::noncopyable { +template +class RWTicketSpinLockT { typedef detail::RWTicketIntTrait IntTraitType; typedef typename detail::RWTicketIntTrait::FullInt FullInt; typedef typename detail::RWTicketIntTrait::HalfInt HalfInt; @@ -496,6 +580,7 @@ class RWTicketSpinLockT : boost::noncopyable { QuarterInt; union RWTicket { + constexpr RWTicket() : whole(0) {} FullInt whole; HalfInt readWrite; __extension__ struct { @@ -506,23 +591,24 @@ class RWTicketSpinLockT : boost::noncopyable { } ticket; private: // Some x64-specific utilities for atomic access to ticket. - template static T load_acquire(T* addr) { + template static T load_acquire(T* addr) { T t = *addr; // acquire barrier - asm volatile("" : : : "memory"); + asm_volatile_memory(); return t; } - template + template static void store_release(T* addr, T v) { - asm volatile("" : : : "memory"); + asm_volatile_memory(); *addr = v; // release barrier } public: - RWTicketSpinLockT() { - store_release(&ticket.whole, FullInt(0)); - } + constexpr RWTicketSpinLockT() {} + + RWTicketSpinLockT(RWTicketSpinLockT const&) = delete; + RWTicketSpinLockT& operator=(RWTicketSpinLockT const&) = delete; void lock() { if (kFavorWriter) { @@ -552,7 +638,9 @@ class RWTicketSpinLockT : boost::noncopyable { bool try_lock() { RWTicket t; FullInt old = t.whole = load_acquire(&ticket.whole); - if (t.users != t.write) return false; + if (t.users != t.write) { + return false; + } ++t.users; return __sync_bool_compare_and_swap(&ticket.whole, old, t.whole); } @@ -564,9 +652,18 @@ class RWTicketSpinLockT : boost::noncopyable { * turns. */ void writeLockAggressive() { + // std::this_thread::yield() is needed here to avoid a pathology if the number + // of threads attempting concurrent writes is >= the number of real + // cores allocated to this process. This is less likely than the + // corresponding situation in lock_shared(), but we still want to + // avoid it + uint_fast32_t count = 0; QuarterInt val = __sync_fetch_and_add(&ticket.users, 1); while (val != load_acquire(&ticket.write)) { - asm volatile("pause"); + asm_volatile_pause(); + if (UNLIKELY(++count > 1000)) { + std::this_thread::yield(); + } } } @@ -578,6 +675,9 @@ class RWTicketSpinLockT : boost::noncopyable { // writers, so the writer has less chance to get the lock when // there are a lot of competing readers. The aggressive spinning // can help to avoid starving writers. + // + // We don't worry about std::this_thread::yield() here because the caller + // has already explicitly abandoned fairness. while (!try_lock()) {} } @@ -592,7 +692,7 @@ class RWTicketSpinLockT : boost::noncopyable { t.whole = load_acquire(&ticket.whole); FullInt old = t.whole; -#ifdef __SSE2__ +#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ // SSE2 can reduce the lock and unlock overhead by 10% static const QuarterInt kDeltaBuf[4] = { 1, 1, 0, 0 }; // write/read/user static const __m128i kDelta = IntTraitType::make128(kDeltaBuf); @@ -606,8 +706,15 @@ class RWTicketSpinLockT : boost::noncopyable { } void lock_shared() { + // std::this_thread::yield() is important here because we can't grab the + // shared lock if there is a pending writeLockAggressive, so we + // need to let threads that already have a shared lock complete + uint_fast32_t count = 0; while (!LIKELY(try_lock_shared())) { - asm volatile("pause"); + asm_volatile_pause(); + if (UNLIKELY((++count & 1023) == 0)) { + std::this_thread::yield(); + } } } @@ -615,7 +722,7 @@ class RWTicketSpinLockT : boost::noncopyable { RWTicket t, old; old.whole = t.whole = load_acquire(&ticket.whole); old.users = old.read; -#ifdef __SSE2__ +#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ // SSE2 may reduce the total lock and unlock overhead by 10% static const QuarterInt kDeltaBuf[4] = { 0, 1, 1, 0 }; // write/read/user static const __m128i kDelta = IntTraitType::make128(kDeltaBuf); @@ -629,21 +736,27 @@ class RWTicketSpinLockT : boost::noncopyable { } void unlock_shared() { - QuarterInt val = __sync_fetch_and_add(&ticket.write, 1); + __sync_fetch_and_add(&ticket.write, 1); } class WriteHolder; typedef RWTicketSpinLockT RWSpinLock; - class ReadHolder : boost::noncopyable { + class ReadHolder { public: - explicit ReadHolder(RWSpinLock *lock = nullptr) : - lock_(lock) { - if (lock_) lock_->lock_shared(); + ReadHolder(ReadHolder const&) = delete; + ReadHolder& operator=(ReadHolder const&) = delete; + + explicit ReadHolder(RWSpinLock* lock) : lock_(lock) { + if (lock_) { + lock_->lock_shared(); + } } explicit ReadHolder(RWSpinLock &lock) : lock_ (&lock) { - if (lock_) lock_->lock_shared(); + if (lock_) { + lock_->lock_shared(); + } } // atomically unlock the write-lock from writer and acquire the read-lock @@ -655,13 +768,19 @@ class RWTicketSpinLockT : boost::noncopyable { } ~ReadHolder() { - if (lock_) lock_->unlock_shared(); + if (lock_) { + lock_->unlock_shared(); + } } void reset(RWSpinLock *lock = nullptr) { - if (lock_) lock_->unlock_shared(); + if (lock_) { + lock_->unlock_shared(); + } lock_ = lock; - if (lock_) lock_->lock_shared(); + if (lock_) { + lock_->lock_shared(); + } } void swap(ReadHolder *other) { @@ -672,24 +791,39 @@ class RWTicketSpinLockT : boost::noncopyable { RWSpinLock *lock_; }; - class WriteHolder : boost::noncopyable { + class WriteHolder { public: - explicit WriteHolder(RWSpinLock *lock = nullptr) : lock_(lock) { - if (lock_) lock_->lock(); + WriteHolder(WriteHolder const&) = delete; + WriteHolder& operator=(WriteHolder const&) = delete; + + explicit WriteHolder(RWSpinLock* lock) : lock_(lock) { + if (lock_) { + lock_->lock(); + } } explicit WriteHolder(RWSpinLock &lock) : lock_ (&lock) { - if (lock_) lock_->lock(); + if (lock_) { + lock_->lock(); + } } ~WriteHolder() { - if (lock_) lock_->unlock(); + if (lock_) { + lock_->unlock(); + } } void reset(RWSpinLock *lock = nullptr) { - if (lock == lock_) return; - if (lock_) lock_->unlock(); + if (lock == lock_) { + return; + } + if (lock_) { + lock_->unlock(); + } lock_ = lock; - if (lock_) lock_->lock(); + if (lock_) { + lock_->lock(); + } } void swap(WriteHolder *other) { @@ -700,25 +834,6 @@ class RWTicketSpinLockT : boost::noncopyable { friend class ReadHolder; RWSpinLock *lock_; }; - - // Synchronized<> adaptors. - friend void acquireRead(RWTicketSpinLockT& mutex) { - mutex.lock_shared(); - } - friend void acquireReadWrite(RWTicketSpinLockT& mutex) { - mutex.lock(); - } - friend bool acquireReadWrite(RWTicketSpinLockT& mutex, - unsigned int milliseconds) { - mutex.lock(); - return true; - } - friend void releaseRead(RWTicketSpinLockT& mutex) { - mutex.unlock_shared(); - } - friend void releaseReadWrite(RWTicketSpinLockT& mutex) { - mutex.unlock(); - } }; typedef RWTicketSpinLockT<32> RWTicketSpinLock32; @@ -726,10 +841,8 @@ typedef RWTicketSpinLockT<64> RWTicketSpinLock64; #endif // RW_SPINLOCK_USE_X86_INTRINSIC_ -} // namespace folly +} // namespace folly #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_ #undef RW_SPINLOCK_USE_X86_INTRINSIC_ #endif - -#endif // FOLLY_RWSPINLOCK_H_