Let Futex import base-class ctors

[folly.git] / folly / SharedMutex.h
diff --git a/folly/SharedMutex.h b/folly/SharedMutex.h

index 4ccdff2a0a07162b711a67a806533bc1ee6aef52..976c6e09f80f9918aa1f599167aea16a00ab6f52 100644 (file)
--- a/folly/SharedMutex.h
+++ b/folly/SharedMutex.h
@@ -1,5 +1,5 @@
  /*
- * Copyright 2016 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -19,11 +19,13 @@
  #pragma once
  
  #include <stdint.h>
+
  #include <atomic>
  #include <thread>
  #include <type_traits>
+
  #include <folly/Likely.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/concurrency/CacheLocality.h>
  #include <folly/detail/Futex.h>
  #include <folly/portability/Asm.h>
  #include <folly/portability/SysResource.h>
@@ -204,11 +206,11 @@
  //
  // If you have observed by profiling that your SharedMutex-s are getting
  // cache misses on deferredReaders[] due to another SharedMutex user, then
-// you can use the tag type plus the RWDEFERREDLOCK_DECLARE_STATIC_STORAGE
-// macro to create your own instantiation of the type.  The contention
-// threshold (see kNumSharedToStartDeferring) should make this unnecessary
-// in all but the most extreme cases.  Make sure to check that the
-// increased icache and dcache footprint of the tagged result is worth it.
+// you can use the tag type to create your own instantiation of the type.
+// The contention threshold (see kNumSharedToStartDeferring) should make
+// this unnecessary in all but the most extreme cases.  Make sure to check
+// that the increased icache and dcache footprint of the tagged result is
+// worth it.
  
  // SharedMutex's use of thread local storage is as an optimization, so
  // for the case where thread local storage is not supported, define it
@@ -234,10 +236,11 @@ struct SharedMutexToken {
    uint16_t slot_;
  };
  
-template <bool ReaderPriority,
-          typename Tag_ = void,
-          template <typename> class Atom = std::atomic,
-          bool BlockImmediately = false>
+template <
+    bool ReaderPriority,
+    typename Tag_ = void,
+    template <typename> class Atom = std::atomic,
+    bool BlockImmediately = false>
  class SharedMutexImpl {
   public:
    static constexpr bool kReaderPriority = ReaderPriority;
@@ -249,7 +252,7 @@ class SharedMutexImpl {
    class UpgradeHolder;
    class WriteHolder;
  
-  constexpr SharedMutexImpl() : state_(0) {}
+  constexpr SharedMutexImpl() noexcept : state_(0) {}
  
    SharedMutexImpl(const SharedMutexImpl&) = delete;
    SharedMutexImpl(SharedMutexImpl&&) = delete;
@@ -560,7 +563,7 @@ class SharedMutexImpl {
    };
  
    // 32 bits of state
-  Futex state_;
+  Futex state_{};
  
    // S count needs to be on the end, because we explicitly allow it to
    // underflow.  This can occur while we are in the middle of applying
@@ -723,6 +726,10 @@ class SharedMutexImpl {
    // This is the starting location for Token-less unlock_shared().
    static FOLLY_SHAREDMUTEX_TLS uint32_t tls_lastTokenlessSlot;
  
+  // Last deferred reader slot used.
+  static FOLLY_SHAREDMUTEX_TLS uint32_t tls_lastDeferredReaderSlot;
+
+
    // Only indexes divisible by kDeferredSeparationFactor are used.
    // If any of those elements points to a SharedMutexImpl, then it
    // should be considered that there is a shared lock on that instance.
@@ -731,9 +738,8 @@ class SharedMutexImpl {
    typedef Atom<uintptr_t> DeferredReaderSlot;
  
   private:
-  FOLLY_ALIGN_TO_AVOID_FALSE_SHARING static DeferredReaderSlot deferredReaders
-      [kMaxDeferredReaders *
-       kDeferredSeparationFactor];
+  alignas(hardware_destructive_interference_size) static DeferredReaderSlot
+      deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor];
  
    // Performs an exclusive lock, waiting for state_ & waitMask to be
    // zero first
@@ -761,7 +767,7 @@ class SharedMutexImpl {
        }
  
        uint32_t after = (state & kMayDefer) == 0 ? 0 : kPrevDefer;
-      if (!ReaderPriority || (state & (kMayDefer | kHasS)) == 0) {
+      if (!kReaderPriority || (state & (kMayDefer | kHasS)) == 0) {
          // Block readers immediately, either because we are in write
          // priority mode or because we can acquire the lock in one
          // step.  Note that if state has kHasU, then we are doing an
@@ -802,7 +808,7 @@ class SharedMutexImpl {
              return false;
            }
  
-          if (ReaderPriority && (state & kHasE) == 0) {
+          if (kReaderPriority && (state & kHasE) == 0) {
              assert((state & kBegunE) != 0);
              if (!state_.compare_exchange_strong(state,
                                                  (state & ~kBegunE) | kHasE)) {
@@ -843,6 +849,7 @@ class SharedMutexImpl {
                              WaitContext& ctx) {
  #ifdef RUSAGE_THREAD
      struct rusage usage;
+    std::memset(&usage, 0, sizeof(usage));
      long before = -1;
  #endif
      for (uint32_t yieldCount = 0; yieldCount < kMaxSoftYieldCount;
@@ -985,7 +992,7 @@ class SharedMutexImpl {
            return;
          }
        }
-      asm_pause();
+      asm_volatile_pause();
        if (UNLIKELY(++spinCount >= kMaxSpinCount)) {
          applyDeferredReaders(state, ctx, slot);
          return;
@@ -998,6 +1005,7 @@ class SharedMutexImpl {
  
  #ifdef RUSAGE_THREAD
      struct rusage usage;
+    std::memset(&usage, 0, sizeof(usage));
      long before = -1;
  #endif
      for (uint32_t yieldCount = 0; yieldCount < kMaxSoftYieldCount;
@@ -1129,10 +1137,15 @@ class SharedMutexImpl {
  
   public:
    class ReadHolder {
-   public:
      ReadHolder() : lock_(nullptr) {}
  
-    explicit ReadHolder(const SharedMutexImpl* lock) : ReadHolder(*lock) {}
+   public:
+    explicit ReadHolder(const SharedMutexImpl* lock)
+        : lock_(const_cast<SharedMutexImpl*>(lock)) {
+      if (lock_) {
+        lock_->lock_shared(token_);
+      }
+    }
  
      explicit ReadHolder(const SharedMutexImpl& lock)
          : lock_(const_cast<SharedMutexImpl*>(&lock)) {
@@ -1186,10 +1199,14 @@ class SharedMutexImpl {
    };
  
    class UpgradeHolder {
-   public:
      UpgradeHolder() : lock_(nullptr) {}
  
-    explicit UpgradeHolder(SharedMutexImpl* lock) : UpgradeHolder(*lock) {}
+   public:
+    explicit UpgradeHolder(SharedMutexImpl* lock) : lock_(lock) {
+      if (lock_) {
+        lock_->lock_upgrade();
+      }
+    }
  
      explicit UpgradeHolder(SharedMutexImpl& lock) : lock_(&lock) {
        lock_->lock_upgrade();
@@ -1232,10 +1249,14 @@ class SharedMutexImpl {
    };
  
    class WriteHolder {
-   public:
      WriteHolder() : lock_(nullptr) {}
  
-    explicit WriteHolder(SharedMutexImpl* lock) : WriteHolder(*lock) {}
+   public:
+    explicit WriteHolder(SharedMutexImpl* lock) : lock_(lock) {
+      if (lock_) {
+        lock_->lock();
+      }
+    }
  
      explicit WriteHolder(SharedMutexImpl& lock) : lock_(&lock) {
        lock_->lock();
@@ -1328,11 +1349,11 @@ template <
      typename Tag_,
      template <typename> class Atom,
      bool BlockImmediately>
-typename SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
-    DeferredReaderSlot
-        SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
-            deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor] =
-                {};
+alignas(hardware_destructive_interference_size)
+    typename SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+        DeferredReaderSlot
+    SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+        deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor] = {};
  
  template <
      bool ReaderPriority,
@@ -1343,6 +1364,15 @@ FOLLY_SHAREDMUTEX_TLS uint32_t
      SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
          tls_lastTokenlessSlot = 0;
  
+template <
+    bool ReaderPriority,
+    typename Tag_,
+    template <typename> class Atom,
+    bool BlockImmediately>
+FOLLY_SHAREDMUTEX_TLS uint32_t
+    SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
+        tls_lastDeferredReaderSlot = 0;
+
  template <
      bool ReaderPriority,
      typename Tag_,
@@ -1377,7 +1407,7 @@ bool SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
        return false;
      }
  
-    uint32_t slot;
+    uint32_t slot = tls_lastDeferredReaderSlot;
      uintptr_t slotValue = 1; // any non-zero value will do
  
      bool canAlreadyDefer = (state & kMayDefer) != 0;
@@ -1385,21 +1415,25 @@ bool SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
          (state & kHasS) >= (kNumSharedToStartDeferring - 1) * kIncrHasS;
      bool drainInProgress = ReaderPriority && (state & kBegunE) != 0;
      if (canAlreadyDefer || (aboveDeferThreshold && !drainInProgress)) {
-      // starting point for our empty-slot search, can change after
-      // calling waitForZeroBits
-      uint32_t bestSlot =
-          (uint32_t)folly::detail::AccessSpreader<Atom>::current(
-              kMaxDeferredReaders);
-
-      // deferred readers are already enabled, or it is time to
-      // enable them if we can find a slot
-      for (uint32_t i = 0; i < kDeferredSearchDistance; ++i) {
-        slot = bestSlot ^ i;
-        assert(slot < kMaxDeferredReaders);
-        slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
-        if (slotValue == 0) {
-          // found empty slot
-          break;
+      /* Try using the most recent slot first. */
+      slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
+      if (slotValue != 0) {
+        // starting point for our empty-slot search, can change after
+        // calling waitForZeroBits
+        uint32_t bestSlot =
+            (uint32_t)folly::AccessSpreader<Atom>::current(kMaxDeferredReaders);
+
+        // deferred readers are already enabled, or it is time to
+        // enable them if we can find a slot
+        for (uint32_t i = 0; i < kDeferredSearchDistance; ++i) {
+          slot = bestSlot ^ i;
+          assert(slot < kMaxDeferredReaders);
+          slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
+          if (slotValue == 0) {
+            // found empty slot
+            tls_lastDeferredReaderSlot = slot;
+            break;
+          }
          }
        }
      }