/*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
/*
+ * N.B. You most likely do _not_ want to use RWSpinLock or any other
+ * kind of spinlock. Use SharedMutex instead.
+ *
+ * In short, spinlocks in preemptive multi-tasking operating systems
+ * have serious problems and fast mutexes like SharedMutex are almost
+ * certainly the better choice, because letting the OS scheduler put a
+ * thread to sleep is better for system responsiveness and throughput
+ * than wasting a timeslice repeatedly querying a lock held by a
+ * thread that's blocked, and you can't prevent userspace
+ * programs blocking.
+ *
+ * Spinlocks in an operating system kernel make much more sense than
+ * they do in userspace.
+ *
+ * -------------------------------------------------------------------
+ *
* Two Read-Write spin lock implementations.
*
* Ref: http://locklessinc.com/articles/locks
* are very compact (4/8 bytes), so are suitable for per-instance
* based locking, particularly when contention is not expected.
*
- * In most cases, RWSpinLock is a reasonable choice. It has minimal
- * overhead, and comparable contention performance when the number of
- * competing threads is less than or equal to the number of logical
- * CPUs. Even as the number of threads gets larger, RWSpinLock can
- * still be very competitive in READ, although it is slower on WRITE,
- * and also inherently unfair to writers.
+ * For a spinlock, RWSpinLock is a reasonable choice. (See the note
+ * about for why a spin lock is frequently a bad idea generally.)
+ * RWSpinLock has minimal overhead, and comparable contention
+ * performance when the number of competing threads is less than or
+ * equal to the number of logical CPUs. Even as the number of
+ * threads gets larger, RWSpinLock can still be very competitive in
+ * READ, although it is slower on WRITE, and also inherently unfair
+ * to writers.
*
* RWTicketSpinLock shows more balanced READ/WRITE performance. If
* your application really needs a lot more threads, and a
* RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
* readers and writers.
*
+ * RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
+ * writer priority) is NOT reentrant, even for lock_shared().
+ *
+ * The lock will not grant any new shared (read) accesses while a thread
+ * attempting to acquire the lock in write mode is blocked. (That is,
+ * if the lock is held in shared mode by N threads, and a thread attempts
+ * to acquire it in write mode, no one else can acquire it in shared mode
+ * until these N threads release the lock and then the blocked thread
+ * acquires and releases the exclusive lock.) This also applies for
+ * attempts to reacquire the lock in shared mode by threads that already
+ * hold it in shared mode, making the lock non-reentrant.
+ *
* RWSpinLock handles 2^30 - 1 concurrent readers.
*
* @author Xin Liu <xliux@fb.com>
*/
-#ifndef FOLLY_RWSPINLOCK_H_
-#define FOLLY_RWSPINLOCK_H_
+#pragma once
/*
========================================================================
*/
#include <folly/Portability.h>
+#include <folly/portability/Asm.h>
#if defined(__GNUC__) && \
(defined(__i386) || FOLLY_X64 || \
defined(ARCH_K8))
-#define RW_SPINLOCK_USE_X86_INTRINSIC_
-#include <x86intrin.h>
+# define RW_SPINLOCK_USE_X86_INTRINSIC_
+# include <x86intrin.h>
+#elif defined(_MSC_VER) && defined(FOLLY_X64)
+# define RW_SPINLOCK_USE_X86_INTRINSIC_
#else
-#undef RW_SPINLOCK_USE_X86_INTRINSIC_
+# undef RW_SPINLOCK_USE_X86_INTRINSIC_
+#endif
+
+// iOS doesn't define _mm_cvtsi64_si128 and friends
+#if (FOLLY_SSE >= 2) && !FOLLY_MOBILE
+#define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
+#else
+#undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
#endif
#include <atomic>
#include <string>
#include <algorithm>
-#include <boost/noncopyable.hpp>
#include <sched.h>
#include <glog/logging.h>
* UpgradeLockable concepts except the TimedLockable related locking/unlocking
* interfaces.
*/
-class RWSpinLock : boost::noncopyable {
+class RWSpinLock {
enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 };
public:
- RWSpinLock() : bits_(0) {}
+ constexpr RWSpinLock() : bits_(0) {}
+
+ RWSpinLock(RWSpinLock const&) = delete;
+ RWSpinLock& operator=(RWSpinLock const&) = delete;
// Lockable Concept
void lock() {
RWSpinLock* lock_;
};
- // Synchronized<> adaptors
- friend void acquireRead(RWSpinLock& l) { return l.lock_shared(); }
- friend void acquireReadWrite(RWSpinLock& l) { return l.lock(); }
- friend void releaseRead(RWSpinLock& l) { return l.unlock_shared(); }
- friend void releaseReadWrite(RWSpinLock& l) { return l.unlock(); }
-
private:
std::atomic<int32_t> bits_;
};
typedef uint32_t HalfInt;
typedef uint16_t QuarterInt;
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
static __m128i make128(const uint16_t v[4]) {
return _mm_set_epi16(0, 0, 0, 0, v[3], v[2], v[1], v[0]);
}
typedef uint16_t HalfInt;
typedef uint8_t QuarterInt;
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
static __m128i make128(const uint8_t v[4]) {
return _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, v[3], v[2], v[1], v[0]);
template<size_t kBitWidth, bool kFavorWriter=false>
-class RWTicketSpinLockT : boost::noncopyable {
+class RWTicketSpinLockT {
typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType;
typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt;
typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt;
QuarterInt;
union RWTicket {
+ constexpr RWTicket() : whole(0) {}
FullInt whole;
HalfInt readWrite;
__extension__ struct {
private: // Some x64-specific utilities for atomic access to ticket.
template<class T> static T load_acquire(T* addr) {
T t = *addr; // acquire barrier
- asm volatile("" : : : "memory");
+ asm_volatile_memory();
return t;
}
template<class T>
static void store_release(T* addr, T v) {
- asm volatile("" : : : "memory");
+ asm_volatile_memory();
*addr = v; // release barrier
}
public:
- RWTicketSpinLockT() {
- store_release(&ticket.whole, FullInt(0));
- }
+ constexpr RWTicketSpinLockT() {}
+
+ RWTicketSpinLockT(RWTicketSpinLockT const&) = delete;
+ RWTicketSpinLockT& operator=(RWTicketSpinLockT const&) = delete;
void lock() {
if (kFavorWriter) {
int count = 0;
QuarterInt val = __sync_fetch_and_add(&ticket.users, 1);
while (val != load_acquire(&ticket.write)) {
- asm volatile("pause");
+ asm_volatile_pause();
if (UNLIKELY(++count > 1000)) sched_yield();
}
}
t.whole = load_acquire(&ticket.whole);
FullInt old = t.whole;
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
// SSE2 can reduce the lock and unlock overhead by 10%
static const QuarterInt kDeltaBuf[4] = { 1, 1, 0, 0 }; // write/read/user
static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
// need to let threads that already have a shared lock complete
int count = 0;
while (!LIKELY(try_lock_shared())) {
- asm volatile("pause");
+ asm_volatile_pause();
if (UNLIKELY((++count & 1023) == 0)) sched_yield();
}
}
RWTicket t, old;
old.whole = t.whole = load_acquire(&ticket.whole);
old.users = old.read;
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
// SSE2 may reduce the total lock and unlock overhead by 10%
static const QuarterInt kDeltaBuf[4] = { 0, 1, 1, 0 }; // write/read/user
static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
class WriteHolder;
typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock;
- class ReadHolder : boost::noncopyable {
+ class ReadHolder {
public:
+ ReadHolder(ReadHolder const&) = delete;
+ ReadHolder& operator=(ReadHolder const&) = delete;
+
explicit ReadHolder(RWSpinLock *lock = nullptr) :
lock_(lock) {
if (lock_) lock_->lock_shared();
RWSpinLock *lock_;
};
- class WriteHolder : boost::noncopyable {
+ class WriteHolder {
public:
+ WriteHolder(WriteHolder const&) = delete;
+ WriteHolder& operator=(WriteHolder const&) = delete;
+
explicit WriteHolder(RWSpinLock *lock = nullptr) : lock_(lock) {
if (lock_) lock_->lock();
}
friend class ReadHolder;
RWSpinLock *lock_;
};
-
- // Synchronized<> adaptors.
- friend void acquireRead(RWTicketSpinLockT& mutex) {
- mutex.lock_shared();
- }
- friend void acquireReadWrite(RWTicketSpinLockT& mutex) {
- mutex.lock();
- }
- friend bool acquireReadWrite(RWTicketSpinLockT& mutex,
- unsigned int milliseconds) {
- mutex.lock();
- return true;
- }
- friend void releaseRead(RWTicketSpinLockT& mutex) {
- mutex.unlock_shared();
- }
- friend void releaseReadWrite(RWTicketSpinLockT& mutex) {
- mutex.unlock();
- }
};
typedef RWTicketSpinLockT<32> RWTicketSpinLock32;
#ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
#undef RW_SPINLOCK_USE_X86_INTRINSIC_
#endif
-
-#endif // FOLLY_RWSPINLOCK_H_