/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
* readers and writers.
*
+ * RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
+ * writer priority) is NOT reentrant, even for lock_shared().
+ *
+ * The lock will not grant any new shared (read) accesses while a thread
+ * attempting to acquire the lock in write mode is blocked. (That is,
+ * if the lock is held in shared mode by N threads, and a thread attempts
+ * to acquire it in write mode, no one else can acquire it in shared mode
+ * until these N threads release the lock and then the blocked thread
+ * acquires and releases the exclusive lock.) This also applies for
+ * attempts to reacquire the lock in shared mode by threads that already
+ * hold it in shared mode, making the lock non-reentrant.
+ *
* RWSpinLock handles 2^30 - 1 concurrent readers.
*
* @author Xin Liu <xliux@fb.com>
*/
-#if defined(__GNUC__) && (defined(__i386) || defined(__x86_64__) || \
- defined(ARCH_K8))
-#define RW_SPINLOCK_USE_X86_INTRINSIC_
-#include <x86intrin.h>
+#include <folly/Portability.h>
+
+#if defined(__GNUC__) && \
+ (defined(__i386) || FOLLY_X64 || \
+ defined(ARCH_K8))
+# define RW_SPINLOCK_USE_X86_INTRINSIC_
+# include <x86intrin.h>
+#elif defined(_MSC_VER) && defined(FOLLY_X64)
+# define RW_SPINLOCK_USE_X86_INTRINSIC_
#else
-#undef RW_SPINLOCK_USE_X86_INTRINSIC_
+# undef RW_SPINLOCK_USE_X86_INTRINSIC_
+#endif
+
+// iOS doesn't define _mm_cvtsi64_si128 and friends
+#if (FOLLY_SSE >= 2) && !TARGET_OS_IPHONE
+#define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
+#else
+#undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
#endif
#include <atomic>
#include <sched.h>
#include <glog/logging.h>
-#include "folly/Likely.h"
+#include <folly/Likely.h>
namespace folly {
bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel);
}
- void unlock_shared_and_lock_upgrade() {
- lock_upgrade();
- unlock_shared();
- }
-
// write unlock and upgrade lock atomically
void unlock_and_lock_upgrade() {
// need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
}
// Try to get reader permission on the lock. This can fail if we
- // find out someone is a writer.
+ // find out someone is a writer or upgrader.
+ // Setting the UPGRADED bit would allow a writer-to-be to indicate
+ // its intention to write and block any new readers while waiting
+ // for existing readers to finish and release their read locks. This
+ // helps avoid starving writers (promoted from upgraders).
bool try_lock_shared() {
// fetch_add is considerably (100%) faster than compare_exchange,
// so here we are optimizing for the common (lock success) case.
int32_t value = bits_.fetch_add(READER, std::memory_order_acquire);
- if (UNLIKELY(value & WRITER)) {
+ if (UNLIKELY(value & (WRITER|UPGRADED))) {
bits_.fetch_add(-READER, std::memory_order_release);
return false;
}
lock_->lock_shared();
}
- ReadHolder(ReadHolder&& other) : lock_(other.lock_) {
+ ReadHolder(ReadHolder&& other) noexcept : lock_(other.lock_) {
other.lock_ = nullptr;
}
lock_->lock_upgrade();
}
- explicit UpgradedHolder(ReadHolder&& reader) {
- lock_ = reader.lock_;
- reader.lock_ = nullptr;
- if (lock_) lock_->unlock_shared_and_lock_upgrade();
- }
-
explicit UpgradedHolder(WriteHolder&& writer) {
lock_ = writer.lock_;
writer.lock_ = nullptr;
if (lock_) lock_->unlock_and_lock_upgrade();
}
- UpgradedHolder(UpgradedHolder&& other) : lock_(other.lock_) {
+ UpgradedHolder(UpgradedHolder&& other) noexcept : lock_(other.lock_) {
other.lock_ = nullptr;
}
if (lock_) lock_->unlock_upgrade_and_lock();
}
- WriteHolder(WriteHolder&& other) : lock_(other.lock_) {
+ WriteHolder(WriteHolder&& other) noexcept : lock_(other.lock_) {
other.lock_ = nullptr;
}
typedef uint32_t HalfInt;
typedef uint16_t QuarterInt;
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
static __m128i make128(const uint16_t v[4]) {
return _mm_set_epi16(0, 0, 0, 0, v[3], v[2], v[1], v[0]);
}
typedef uint16_t HalfInt;
typedef uint8_t QuarterInt;
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
static __m128i make128(const uint8_t v[4]) {
return _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, v[3], v[2], v[1], v[0]);
private: // Some x64-specific utilities for atomic access to ticket.
template<class T> static T load_acquire(T* addr) {
T t = *addr; // acquire barrier
- asm volatile("" : : : "memory");
+ asm_volatile_memory();
return t;
}
template<class T>
static void store_release(T* addr, T v) {
- asm volatile("" : : : "memory");
+ asm_volatile_memory();
*addr = v; // release barrier
}
* turns.
*/
void writeLockAggressive() {
+ // sched_yield() is needed here to avoid a pathology if the number
+ // of threads attempting concurrent writes is >= the number of real
+ // cores allocated to this process. This is less likely than the
+ // corresponding situation in lock_shared(), but we still want to
+ // avoid it
+ int count = 0;
QuarterInt val = __sync_fetch_and_add(&ticket.users, 1);
while (val != load_acquire(&ticket.write)) {
- asm volatile("pause");
+ asm_volatile_pause();
+ if (UNLIKELY(++count > 1000)) sched_yield();
}
}
// writers, so the writer has less chance to get the lock when
// there are a lot of competing readers. The aggressive spinning
// can help to avoid starving writers.
+ //
+ // We don't worry about sched_yield() here because the caller
+ // has already explicitly abandoned fairness.
while (!try_lock()) {}
}
t.whole = load_acquire(&ticket.whole);
FullInt old = t.whole;
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
// SSE2 can reduce the lock and unlock overhead by 10%
static const QuarterInt kDeltaBuf[4] = { 1, 1, 0, 0 }; // write/read/user
static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
}
void lock_shared() {
+ // sched_yield() is important here because we can't grab the
+ // shared lock if there is a pending writeLockAggressive, so we
+ // need to let threads that already have a shared lock complete
+ int count = 0;
while (!LIKELY(try_lock_shared())) {
- asm volatile("pause");
+ asm_volatile_pause();
+ if (UNLIKELY((++count & 1023) == 0)) sched_yield();
}
}
RWTicket t, old;
old.whole = t.whole = load_acquire(&ticket.whole);
old.users = old.read;
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
// SSE2 may reduce the total lock and unlock overhead by 10%
static const QuarterInt kDeltaBuf[4] = { 0, 1, 1, 0 }; // write/read/user
static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);