Switch to the try_wait_for and try_wait_until Baton APIs

[folly.git] / folly / IndexedMemPool.h
diff --git a/folly/IndexedMemPool.h b/folly/IndexedMemPool.h

index 437685a23e657349cee5dd9c8e2ce51376242299..82ae62954d97f05b3967723c069b604e6a224f2c 100644 (file)
--- a/folly/IndexedMemPool.h
+++ b/folly/IndexedMemPool.h
@@ -1,5 +1,5 @@
  /*
- * Copyright 2015 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,28 +14,90 @@
   * limitations under the License.
   */
  
-#ifndef FOLLY_INDEXEDMEMPOOL_H
-#define FOLLY_INDEXEDMEMPOOL_H
+#pragma once
  
-#include <type_traits>
-#include <stdint.h>
  #include <assert.h>
-#include <unistd.h>
-#include <sys/mman.h>
+#include <errno.h>
+#include <stdint.h>
+
+#include <type_traits>
+
  #include <boost/noncopyable.hpp>
  #include <folly/AtomicStruct.h>
-#include <folly/detail/CacheLocality.h>
+#include <folly/Portability.h>
+#include <folly/concurrency/CacheLocality.h>
+#include <folly/portability/SysMman.h>
+#include <folly/portability/Unistd.h>
  
  // Ignore shadowing warnings within this file, so includers can use -Wshadow.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
+FOLLY_PUSH_WARNING
+FOLLY_GCC_DISABLE_WARNING("-Wshadow")
  
  namespace folly {
  
  namespace detail {
  template <typename Pool>
  struct IndexedMemPoolRecycler;
-}
+} // namespace detail
+
+template <
+    typename T,
+    bool EagerRecycleWhenTrivial = false,
+    bool EagerRecycleWhenNotTrivial = true>
+struct IndexedMemPoolTraits {
+  static constexpr bool eagerRecycle() {
+    return std::is_trivial<T>::value ? EagerRecycleWhenTrivial
+                                     : EagerRecycleWhenNotTrivial;
+  }
+
+  /// Called when the element pointed to by ptr is allocated for the
+  /// first time.
+  static void initialize(T* ptr) {
+    if (!eagerRecycle()) {
+      new (ptr) T();
+    }
+  }
+
+  /// Called when the element pointed to by ptr is freed at the pool
+  /// destruction time.
+  static void cleanup(T* ptr) {
+    if (!eagerRecycle()) {
+      ptr->~T();
+    }
+  }
+
+  /// Called when the element is allocated with the arguments forwarded from
+  /// IndexedMemPool::allocElem.
+  template <typename... Args>
+  static void onAllocate(T* ptr, Args&&... args) {
+    static_assert(
+        sizeof...(Args) == 0 || eagerRecycle(),
+        "emplace-style allocation requires eager recycle, "
+        "which is defaulted only for non-trivial types");
+    if (eagerRecycle()) {
+      new (ptr) T(std::forward<Args>(args)...);
+    }
+  }
+
+  /// Called when the element is recycled.
+  static void onRecycle(T* ptr) {
+    if (eagerRecycle()) {
+      ptr->~T();
+    }
+  }
+};
+
+/// IndexedMemPool traits that implements the lazy lifecycle strategy. In this
+/// strategy elements are default-constructed the first time they are allocated,
+/// and destroyed when the pool itself is destroyed.
+template <typename T>
+using IndexedMemPoolTraitsLazyRecycle = IndexedMemPoolTraits<T, false, false>;
+
+/// IndexedMemPool traits that implements the eager lifecycle strategy. In this
+/// strategy elements are constructed when they are allocated from the pool and
+/// destroyed when recycled.
+template <typename T>
+using IndexedMemPoolTraitsEagerRecycle = IndexedMemPoolTraits<T, true, true>;
  
  /// Instances of IndexedMemPool dynamically allocate and then pool their
  /// element type (T), returning 4-byte integer indices that can be passed
@@ -54,13 +116,17 @@ struct IndexedMemPoolRecycler;
  /// there won't be an ABA match due to the element being overwritten with
  /// a different type that has the same bit pattern.
  ///
-/// IndexedMemPool has two object lifecycle strategies.  The first
-/// is to construct objects when they are allocated from the pool and
-/// destroy them when they are recycled.  In this mode allocIndex and
-/// allocElem have emplace-like semantics.  In the second mode, objects
-/// are default-constructed the first time they are removed from the pool,
-/// and deleted when the pool itself is deleted.  By default the first
-/// mode is used for non-trivial T, and the second is used for trivial T.
+/// The object lifecycle strategy is controlled by the Traits parameter.
+/// One strategy, implemented by IndexedMemPoolTraitsEagerRecycle, is to
+/// construct objects when they are allocated from the pool and destroy
+/// them when they are recycled.  In this mode allocIndex and allocElem
+/// have emplace-like semantics.  In another strategy, implemented by
+/// IndexedMemPoolTraitsLazyRecycle, objects are default-constructed the
+/// first time they are removed from the pool, and deleted when the pool
+/// itself is deleted.  By default the first mode is used for non-trivial
+/// T, and the second is used for trivial T.  Clients can customize the
+/// object lifecycle by providing their own Traits implementation.
+/// See IndexedMemPoolTraits for a Traits example.
  ///
  /// IMPORTANT: Space for extra elements is allocated to account for those
  /// that are inaccessible because they are in other local lists, so the
@@ -84,12 +150,12 @@ struct IndexedMemPoolRecycler;
  /// constructed, but delays element construction.  This means that only
  /// elements that are actually returned to the caller get paged into the
  /// process's resident set (RSS).
-template <typename T,
-          int NumLocalLists_ = 32,
-          int LocalListLimit_ = 200,
-          template<typename> class Atom = std::atomic,
-          bool EagerRecycleWhenTrivial = false,
-          bool EagerRecycleWhenNotTrivial = true>
+template <
+    typename T,
+    uint32_t NumLocalLists_ = 32,
+    uint32_t LocalListLimit_ = 200,
+    template <typename> class Atom = std::atomic,
+    typename Traits = IndexedMemPoolTraits<T>>
  struct IndexedMemPool : boost::noncopyable {
    typedef T value_type;
  
@@ -102,19 +168,15 @@ struct IndexedMemPool : boost::noncopyable {
      LocalListLimit = LocalListLimit_
    };
  
-
-  static constexpr bool eagerRecycle() {
-    return std::is_trivial<T>::value
-        ? EagerRecycleWhenTrivial : EagerRecycleWhenNotTrivial;
-  }
-
    // these are public because clients may need to reason about the number
    // of bits required to hold indices from a pool, given its capacity
  
    static constexpr uint32_t maxIndexForCapacity(uint32_t capacity) {
-    // index of uint32_t(-1) == UINT32_MAX is reserved for isAllocated tracking
-    return std::min(uint64_t(capacity) + (NumLocalLists - 1) * LocalListLimit,
-                    uint64_t(uint32_t(-1) - 1));
+    // index of std::numeric_limits<uint32_t>::max() is reserved for isAllocated
+    // tracking
+    return uint32_t(std::min(
+        uint64_t(capacity) + (NumLocalLists - 1) * LocalListLimit,
+        uint64_t(std::numeric_limits<uint32_t>::max() - 1)));
    }
  
    static constexpr uint32_t capacityForMaxIndex(uint32_t maxIndex) {
@@ -130,7 +192,7 @@ struct IndexedMemPool : boost::noncopyable {
      , globalHead_(TaggedPtr{})
    {
      const size_t needed = sizeof(Slot) * (actualCapacity_ + 1);
-    long pagesize = sysconf(_SC_PAGESIZE);
+    size_t pagesize = size_t(sysconf(_SC_PAGESIZE));
      mmapLength_ = ((needed - 1) & ~(pagesize - 1)) + pagesize;
      assert(needed <= mmapLength_ && mmapLength_ < needed + pagesize);
      assert((mmapLength_ % pagesize) == 0);
@@ -146,10 +208,8 @@ struct IndexedMemPool : boost::noncopyable {
  
    /// Destroys all of the contained elements
    ~IndexedMemPool() {
-    if (!eagerRecycle()) {
-      for (size_t i = size_; i > 0; --i) {
-        slots_[i].~Slot();
-      }
+    for (uint32_t i = maxAllocatedIndex(); i > 0; --i) {
+      Traits::cleanup(&slots_[i].elem);
      }
      munmap(slots_, mmapLength_);
    }
@@ -158,29 +218,39 @@ struct IndexedMemPool : boost::noncopyable {
    /// simultaneously allocated and not yet recycled.  Because of the
    /// local lists it is possible that more elements than this are returned
    /// successfully
-  size_t capacity() {
+  uint32_t capacity() {
      return capacityForMaxIndex(actualCapacity_);
    }
  
+  /// Returns the maximum index of elements ever allocated in this pool
+  /// including elements that have been recycled.
+  uint32_t maxAllocatedIndex() const {
+    // Take the minimum since it is possible that size_ > actualCapacity_.
+    // This can happen if there are multiple concurrent requests
+    // when size_ == actualCapacity_ - 1.
+    return std::min(uint32_t(size_), uint32_t(actualCapacity_));
+  }
+
    /// Finds a slot with a non-zero index, emplaces a T there if we're
    /// using the eager recycle lifecycle mode, and returns the index,
-  /// or returns 0 if no elements are available.
+  /// or returns 0 if no elements are available.  Passes a pointer to
+  /// the element to Traits::onAllocate before the slot is marked as
+  /// allocated.
    template <typename ...Args>
    uint32_t allocIndex(Args&&... args) {
-    static_assert(sizeof...(Args) == 0 || eagerRecycle(),
-        "emplace-style allocation requires eager recycle, "
-        "which is defaulted only for non-trivial types");
      auto idx = localPop(localHead());
-    if (idx != 0 && eagerRecycle()) {
-      T* ptr = &slot(idx).elem;
-      new (ptr) T(std::forward<Args>(args)...);
+    if (idx != 0) {
+      Slot& s = slot(idx);
+      Traits::onAllocate(&s.elem, std::forward<Args>(args)...);
+      markAllocated(s);
      }
      return idx;
    }
  
    /// If an element is available, returns a std::unique_ptr to it that will
    /// recycle the element to the pool when it is reclaimed, otherwise returns
-  /// a null (falsy) std::unique_ptr
+  /// a null (falsy) std::unique_ptr.  Passes a pointer to the element to
+  /// Traits::onAllocate before the slot is marked as allocated.
    template <typename ...Args>
    UniquePtr allocElem(Args&&... args) {
      auto idx = allocIndex(std::forward<Args>(args)...);
@@ -191,9 +261,6 @@ struct IndexedMemPool : boost::noncopyable {
    /// Gives up ownership previously granted by alloc()
    void recycleIndex(uint32_t idx) {
      assert(isAllocated(idx));
-    if (eagerRecycle()) {
-      slot(idx).elem.~T();
-    }
      localPush(localHead(), idx);
    }
  
@@ -218,7 +285,7 @@ struct IndexedMemPool : boost::noncopyable {
  
      auto slot = reinterpret_cast<const Slot*>(
          reinterpret_cast<const char*>(elem) - offsetof(Slot, elem));
-    auto rv = slot - slots_;
+    auto rv = uint32_t(slot - slots_);
  
      // this assert also tests that rv is in range
      assert(elem == &(*this)[rv]);
@@ -227,7 +294,7 @@ struct IndexedMemPool : boost::noncopyable {
  
    /// Returns true iff idx has been alloc()ed and not recycleIndex()ed
    bool isAllocated(uint32_t idx) const {
-    return slot(idx).localNext == uint32_t(-1);
+    return slot(idx).localNext.load(std::memory_order_acquire) == uint32_t(-1);
    }
  
  
@@ -236,8 +303,8 @@ struct IndexedMemPool : boost::noncopyable {
  
    struct Slot {
      T elem;
-    uint32_t localNext;
-    uint32_t globalNext;
+    Atom<uint32_t> localNext;
+    Atom<uint32_t> globalNext;
  
      Slot() : localNext{}, globalNext{} {}
    };
@@ -292,15 +359,15 @@ struct IndexedMemPool : boost::noncopyable {
  
    ////////// fields
  
+  /// the number of bytes allocated from mmap, which is a multiple of
+  /// the page size of the machine
+  size_t mmapLength_;
+
    /// the actual number of slots that we will allocate, to guarantee
    /// that we will satisfy the capacity requested at construction time.
    /// They will be numbered 1..actualCapacity_ (note the 1-based counting),
    /// and occupy slots_[1..actualCapacity_].
-  size_t actualCapacity_;
-
-  /// the number of bytes allocated from mmap, which is a multiple of
-  /// the page size of the machine
-  size_t mmapLength_;
+  uint32_t actualCapacity_;
  
    /// this records the number of slots that have actually been constructed.
    /// To allow use of atomic ++ instead of CAS, we let this overflow.
@@ -310,7 +377,7 @@ struct IndexedMemPool : boost::noncopyable {
  
    /// raw storage, only 1..min(size_,actualCapacity_) (inclusive) are
    /// actually constructed.  Note that slots_[0] is not constructed or used
-  Slot* FOLLY_ALIGN_TO_AVOID_FALSE_SHARING slots_;
+  FOLLY_ALIGN_TO_AVOID_FALSE_SHARING Slot* slots_;
  
    /// use AccessSpreader to find your list.  We use stripes instead of
    /// thread-local to avoid the need to grow or shrink on thread start
@@ -319,11 +386,11 @@ struct IndexedMemPool : boost::noncopyable {
  
    /// this is the head of a list of node chained by globalNext, that are
    /// themselves each the head of a list chained by localNext
-  AtomicStruct<TaggedPtr,Atom> FOLLY_ALIGN_TO_AVOID_FALSE_SHARING globalHead_;
+  FOLLY_ALIGN_TO_AVOID_FALSE_SHARING AtomicStruct<TaggedPtr,Atom> globalHead_;
  
    ///////////// private methods
  
-  size_t slotIndex(uint32_t idx) const {
+  uint32_t slotIndex(uint32_t idx) const {
      assert(0 < idx &&
             idx <= actualCapacity_ &&
             idx <= size_.load(std::memory_order_acquire));
@@ -343,7 +410,7 @@ struct IndexedMemPool : boost::noncopyable {
    void globalPush(Slot& s, uint32_t localHead) {
      while (true) {
        TaggedPtr gh = globalHead_.load(std::memory_order_acquire);
-      s.globalNext = gh.idx;
+      s.globalNext.store(gh.idx, std::memory_order_relaxed);
        if (globalHead_.compare_exchange_strong(gh, gh.withIdx(localHead))) {
          // success
          return;
@@ -356,7 +423,8 @@ struct IndexedMemPool : boost::noncopyable {
      Slot& s = slot(idx);
      TaggedPtr h = head.load(std::memory_order_acquire);
      while (true) {
-      s.localNext = h.idx;
+      s.localNext.store(h.idx, std::memory_order_release);
+      Traits::onRecycle(&slot(idx).elem);
  
        if (h.size() == LocalListLimit) {
          // push will overflow local list, steal it instead
@@ -380,8 +448,11 @@ struct IndexedMemPool : boost::noncopyable {
    uint32_t globalPop() {
      while (true) {
        TaggedPtr gh = globalHead_.load(std::memory_order_acquire);
-      if (gh.idx == 0 || globalHead_.compare_exchange_strong(
-                  gh, gh.withIdx(slot(gh.idx).globalNext))) {
+      if (gh.idx == 0 ||
+          globalHead_.compare_exchange_strong(
+              gh,
+              gh.withIdx(
+                  slot(gh.idx).globalNext.load(std::memory_order_relaxed)))) {
          // global list is empty, or pop was successful
          return gh.idx;
        }
@@ -395,10 +466,9 @@ struct IndexedMemPool : boost::noncopyable {
        if (h.idx != 0) {
          // local list is non-empty, try to pop
          Slot& s = slot(h.idx);
-        if (head.compare_exchange_strong(
-                    h, h.withIdx(s.localNext).withSizeDecr())) {
+        auto next = s.localNext.load(std::memory_order_relaxed);
+        if (head.compare_exchange_strong(h, h.withIdx(next).withSizeDecr())) {
            // success
-          s.localNext = uint32_t(-1);
            return h.idx;
          }
          continue;
@@ -412,21 +482,15 @@ struct IndexedMemPool : boost::noncopyable {
            // allocation failed
            return 0;
          }
-        // default-construct it now if we aren't going to construct and
-        // destroy on each allocation
-        if (!eagerRecycle()) {
-          T* ptr = &slot(idx).elem;
-          new (ptr) T();
-        }
-        slot(idx).localNext = uint32_t(-1);
+        Traits::initialize(&slot(idx).elem);
          return idx;
        }
  
        Slot& s = slot(idx);
+      auto next = s.localNext.load(std::memory_order_relaxed);
        if (head.compare_exchange_strong(
-                  h, h.withIdx(s.localNext).withSize(LocalListLimit))) {
+              h, h.withIdx(next).withSize(LocalListLimit))) {
          // global list moved to local list, keep head for us
-        s.localNext = uint32_t(-1);
          return idx;
        }
        // local bulk push failed, return idx to the global list and try again
@@ -435,9 +499,16 @@ struct IndexedMemPool : boost::noncopyable {
    }
  
    AtomicStruct<TaggedPtr,Atom>& localHead() {
-    auto stripe = detail::AccessSpreader<Atom>::current(NumLocalLists);
+    auto stripe = AccessSpreader<Atom>::current(NumLocalLists);
      return local_[stripe].head;
    }
+
+  void markAllocated(Slot& slot) {
+    slot.localNext.store(uint32_t(-1), std::memory_order_release);
+  }
+
+ public:
+  static constexpr std::size_t kSlotSize = sizeof(Slot);
  };
  
  namespace detail {
@@ -461,9 +532,8 @@ struct IndexedMemPoolRecycler {
    }
  };
  
-}
+} // namespace detail
  
  } // namespace folly
  
-# pragma GCC diagnostic pop
-#endif
+FOLLY_POP_WARNING