Added a better check for openssl

[folly.git] / folly / RWSpinLock.h
diff --git a/folly/RWSpinLock.h b/folly/RWSpinLock.h

index 9ba6390ed61d750d8e34483de1422c5c934e5732..eea433cfd0d38374598596f05883873f89f3bfe3 100644 (file)
--- a/folly/RWSpinLock.h
+++ b/folly/RWSpinLock.h
@@ -1,5 +1,5 @@
  /*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2016 Facebook, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -15,6 +15,22 @@
   */
  
  /*
+ * N.B. You most likely do _not_ want to use RWSpinLock or any other
+ * kind of spinlock.  Use SharedMutex instead.
+ *
+ * In short, spinlocks in preemptive multi-tasking operating systems
+ * have serious problems and fast mutexes like SharedMutex are almost
+ * certainly the better choice, because letting the OS scheduler put a
+ * thread to sleep is better for system responsiveness and throughput
+ * than wasting a timeslice repeatedly querying a lock held by a
+ * thread that's blocked, and you can't prevent userspace
+ * programs blocking.
+ *
+ * Spinlocks in an operating system kernel make much more sense than
+ * they do in userspace.
+ *
+ * -------------------------------------------------------------------
+ *
   * Two Read-Write spin lock implementations.
   *
   *  Ref: http://locklessinc.com/articles/locks
@@ -24,12 +40,14 @@
   *  are very compact (4/8 bytes), so are suitable for per-instance
   *  based locking, particularly when contention is not expected.
   *
- *  In most cases, RWSpinLock is a reasonable choice.  It has minimal
- *  overhead, and comparable contention performance when the number of
- *  competing threads is less than or equal to the number of logical
- *  CPUs.  Even as the number of threads gets larger, RWSpinLock can
- *  still be very competitive in READ, although it is slower on WRITE,
- *  and also inherently unfair to writers.
+ *  For a spinlock, RWSpinLock is a reasonable choice.  (See the note
+ *  about for why a spin lock is frequently a bad idea generally.)
+ *  RWSpinLock has minimal overhead, and comparable contention
+ *  performance when the number of competing threads is less than or
+ *  equal to the number of logical CPUs.  Even as the number of
+ *  threads gets larger, RWSpinLock can still be very competitive in
+ *  READ, although it is slower on WRITE, and also inherently unfair
+ *  to writers.
   *
   *  RWTicketSpinLock shows more balanced READ/WRITE performance.  If
   *  your application really needs a lot more threads, and a
@@ -46,13 +64,24 @@
   *    RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
   *    readers and writers.
   *
+ *    RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
+ *    writer priority) is NOT reentrant, even for lock_shared().
+ *
+ *    The lock will not grant any new shared (read) accesses while a thread
+ *    attempting to acquire the lock in write mode is blocked. (That is,
+ *    if the lock is held in shared mode by N threads, and a thread attempts
+ *    to acquire it in write mode, no one else can acquire it in shared mode
+ *    until these N threads release the lock and then the blocked thread
+ *    acquires and releases the exclusive lock.) This also applies for
+ *    attempts to reacquire the lock in shared mode by threads that already
+ *    hold it in shared mode, making the lock non-reentrant.
+ *
   *    RWSpinLock handles 2^30 - 1 concurrent readers.
   *
   * @author Xin Liu <xliux@fb.com>
   */
  
-#ifndef FOLLY_RWSPINLOCK_H_
-#define FOLLY_RWSPINLOCK_H_
+#pragma once
  
  /*
  ========================================================================
@@ -107,20 +136,29 @@ pthread_rwlock_t Read        728698     24us       101ns     7.28ms     194us
  */
  
  #include <folly/Portability.h>
+#include <folly/portability/Asm.h>
  
  #if defined(__GNUC__) && \
    (defined(__i386) || FOLLY_X64 || \
     defined(ARCH_K8))
-#define RW_SPINLOCK_USE_X86_INTRINSIC_
-#include <x86intrin.h>
+# define RW_SPINLOCK_USE_X86_INTRINSIC_
+# include <x86intrin.h>
+#elif defined(_MSC_VER) && defined(FOLLY_X64)
+# define RW_SPINLOCK_USE_X86_INTRINSIC_
  #else
-#undef RW_SPINLOCK_USE_X86_INTRINSIC_
+# undef RW_SPINLOCK_USE_X86_INTRINSIC_
+#endif
+
+// iOS doesn't define _mm_cvtsi64_si128 and friends
+#if (FOLLY_SSE >= 2) && !FOLLY_MOBILE
+#define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
+#else
+#undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
  #endif
  
  #include <atomic>
  #include <string>
  #include <algorithm>
-#include <boost/noncopyable.hpp>
  
  #include <sched.h>
  #include <glog/logging.h>
@@ -144,10 +182,13 @@ namespace folly {
   * UpgradeLockable concepts except the TimedLockable related locking/unlocking
   * interfaces.
   */
-class RWSpinLock : boost::noncopyable {
+class RWSpinLock {
    enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 };
   public:
-  RWSpinLock() : bits_(0) {}
+  constexpr RWSpinLock() : bits_(0) {}
+
+  RWSpinLock(RWSpinLock const&) = delete;
+  RWSpinLock& operator=(RWSpinLock const&) = delete;
  
    // Lockable Concept
    void lock() {
@@ -416,12 +457,6 @@ class RWSpinLock : boost::noncopyable {
      RWSpinLock* lock_;
    };
  
-  // Synchronized<> adaptors
-  friend void acquireRead(RWSpinLock& l) { return l.lock_shared(); }
-  friend void acquireReadWrite(RWSpinLock& l) { return l.lock(); }
-  friend void releaseRead(RWSpinLock& l) { return l.unlock_shared(); }
-  friend void releaseReadWrite(RWSpinLock& l) { return l.unlock(); }
-
   private:
    std::atomic<int32_t> bits_;
  };
@@ -442,7 +477,7 @@ struct RWTicketIntTrait<64> {
    typedef uint32_t HalfInt;
    typedef uint16_t QuarterInt;
  
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
    static __m128i make128(const uint16_t v[4]) {
      return _mm_set_epi16(0, 0, 0, 0, v[3], v[2], v[1], v[0]);
    }
@@ -464,7 +499,7 @@ struct RWTicketIntTrait<32> {
    typedef uint16_t HalfInt;
    typedef uint8_t QuarterInt;
  
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
    static __m128i make128(const uint8_t v[4]) {
      return _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, v[3], v[2], v[1], v[0]);
@@ -484,7 +519,7 @@ struct RWTicketIntTrait<32> {
  
  
  template<size_t kBitWidth, bool kFavorWriter=false>
-class RWTicketSpinLockT : boost::noncopyable {
+class RWTicketSpinLockT {
    typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType;
    typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt;
    typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt;
@@ -492,6 +527,7 @@ class RWTicketSpinLockT : boost::noncopyable {
      QuarterInt;
  
    union RWTicket {
+    constexpr RWTicket() : whole(0) {}
      FullInt whole;
      HalfInt readWrite;
      __extension__ struct {
@@ -504,21 +540,22 @@ class RWTicketSpinLockT : boost::noncopyable {
   private: // Some x64-specific utilities for atomic access to ticket.
    template<class T> static T load_acquire(T* addr) {
      T t = *addr; // acquire barrier
-    asm volatile("" : : : "memory");
+    asm_volatile_memory();
      return t;
    }
  
    template<class T>
    static void store_release(T* addr, T v) {
-    asm volatile("" : : : "memory");
+    asm_volatile_memory();
      *addr = v; // release barrier
    }
  
   public:
  
-  RWTicketSpinLockT() {
-    store_release(&ticket.whole, FullInt(0));
-  }
+  constexpr RWTicketSpinLockT() {}
+
+  RWTicketSpinLockT(RWTicketSpinLockT const&) = delete;
+  RWTicketSpinLockT& operator=(RWTicketSpinLockT const&) = delete;
  
    void lock() {
      if (kFavorWriter) {
@@ -568,7 +605,7 @@ class RWTicketSpinLockT : boost::noncopyable {
      int count = 0;
      QuarterInt val = __sync_fetch_and_add(&ticket.users, 1);
      while (val != load_acquire(&ticket.write)) {
-      asm volatile("pause");
+      asm_volatile_pause();
        if (UNLIKELY(++count > 1000)) sched_yield();
      }
    }
@@ -598,7 +635,7 @@ class RWTicketSpinLockT : boost::noncopyable {
      t.whole = load_acquire(&ticket.whole);
      FullInt old = t.whole;
  
-#ifdef __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
      // SSE2 can reduce the lock and unlock overhead by 10%
      static const QuarterInt kDeltaBuf[4] = { 1, 1, 0, 0 };   // write/read/user
      static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
@@ -617,7 +654,7 @@ class RWTicketSpinLockT : boost::noncopyable {
      // need to let threads that already have a shared lock complete
      int count = 0;
      while (!LIKELY(try_lock_shared())) {
-      asm volatile("pause");
+      asm_volatile_pause();
        if (UNLIKELY((++count & 1023) == 0)) sched_yield();
      }
    }
@@ -626,7 +663,7 @@ class RWTicketSpinLockT : boost::noncopyable {
      RWTicket t, old;
      old.whole = t.whole = load_acquire(&ticket.whole);
      old.users = old.read;
-#ifdef  __SSE2__
+#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
      // SSE2 may reduce the total lock and unlock overhead by 10%
      static const QuarterInt kDeltaBuf[4] = { 0, 1, 1, 0 };   // write/read/user
      static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
@@ -646,8 +683,11 @@ class RWTicketSpinLockT : boost::noncopyable {
    class WriteHolder;
  
    typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock;
-  class ReadHolder : boost::noncopyable {
+  class ReadHolder {
     public:
+    ReadHolder(ReadHolder const&) = delete;
+    ReadHolder& operator=(ReadHolder const&) = delete;
+
      explicit ReadHolder(RWSpinLock *lock = nullptr) :
        lock_(lock) {
        if (lock_) lock_->lock_shared();
@@ -683,8 +723,11 @@ class RWTicketSpinLockT : boost::noncopyable {
      RWSpinLock *lock_;
    };
  
-  class WriteHolder : boost::noncopyable {
+  class WriteHolder {
     public:
+    WriteHolder(WriteHolder const&) = delete;
+    WriteHolder& operator=(WriteHolder const&) = delete;
+
      explicit WriteHolder(RWSpinLock *lock = nullptr) : lock_(lock) {
        if (lock_) lock_->lock();
      }
@@ -711,25 +754,6 @@ class RWTicketSpinLockT : boost::noncopyable {
      friend class ReadHolder;
      RWSpinLock *lock_;
    };
-
-  // Synchronized<> adaptors.
-  friend void acquireRead(RWTicketSpinLockT& mutex) {
-    mutex.lock_shared();
-  }
-  friend void acquireReadWrite(RWTicketSpinLockT& mutex) {
-    mutex.lock();
-  }
-  friend bool acquireReadWrite(RWTicketSpinLockT& mutex,
-                               unsigned int milliseconds) {
-    mutex.lock();
-    return true;
-  }
-  friend void releaseRead(RWTicketSpinLockT& mutex) {
-    mutex.unlock_shared();
-  }
-  friend void releaseReadWrite(RWTicketSpinLockT& mutex) {
-    mutex.unlock();
-  }
  };
  
  typedef RWTicketSpinLockT<32> RWTicketSpinLock32;
@@ -742,5 +766,3 @@ typedef RWTicketSpinLockT<64> RWTicketSpinLock64;
  #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
  #undef RW_SPINLOCK_USE_X86_INTRINSIC_
  #endif
-
-#endif  // FOLLY_RWSPINLOCK_H_