X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FIndexedMemPool.h;h=14661dd7d60980337198c31b8c1ef3034563338a;hp=e1b7e43efc38d28f0417f2db5a0fd2323e319cb1;hb=ee1a988dbb1648c25d1cf1c40eafbf2e7bff81b4;hpb=a0359850fd45080ff0862ca0a11884efb424d7d2 diff --git a/folly/IndexedMemPool.h b/folly/IndexedMemPool.h index e1b7e43e..14661dd7 100644 --- a/folly/IndexedMemPool.h +++ b/folly/IndexedMemPool.h @@ -1,5 +1,5 @@ /* - * Copyright 2015 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,28 +14,90 @@ * limitations under the License. */ -#ifndef FOLLY_INDEXEDMEMPOOL_H -#define FOLLY_INDEXEDMEMPOOL_H +#pragma once -#include -#include #include -#include -#include +#include +#include + +#include + #include -#include -#include +#include +#include +#include +#include +#include // Ignore shadowing warnings within this file, so includers can use -Wshadow. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" +FOLLY_PUSH_WARNING +FOLLY_GCC_DISABLE_WARNING("-Wshadow") namespace folly { namespace detail { template struct IndexedMemPoolRecycler; -} +} // namespace detail + +template < + typename T, + bool EagerRecycleWhenTrivial = false, + bool EagerRecycleWhenNotTrivial = true> +struct IndexedMemPoolTraits { + static constexpr bool eagerRecycle() { + return std::is_trivial::value ? EagerRecycleWhenTrivial + : EagerRecycleWhenNotTrivial; + } + + /// Called when the element pointed to by ptr is allocated for the + /// first time. + static void initialize(T* ptr) { + if (!eagerRecycle()) { + new (ptr) T(); + } + } + + /// Called when the element pointed to by ptr is freed at the pool + /// destruction time. + static void cleanup(T* ptr) { + if (!eagerRecycle()) { + ptr->~T(); + } + } + + /// Called when the element is allocated with the arguments forwarded from + /// IndexedMemPool::allocElem. + template + static void onAllocate(T* ptr, Args&&... args) { + static_assert( + sizeof...(Args) == 0 || eagerRecycle(), + "emplace-style allocation requires eager recycle, " + "which is defaulted only for non-trivial types"); + if (eagerRecycle()) { + new (ptr) T(std::forward(args)...); + } + } + + /// Called when the element is recycled. + static void onRecycle(T* ptr) { + if (eagerRecycle()) { + ptr->~T(); + } + } +}; + +/// IndexedMemPool traits that implements the lazy lifecycle strategy. In this +/// strategy elements are default-constructed the first time they are allocated, +/// and destroyed when the pool itself is destroyed. +template +using IndexedMemPoolTraitsLazyRecycle = IndexedMemPoolTraits; + +/// IndexedMemPool traits that implements the eager lifecycle strategy. In this +/// strategy elements are constructed when they are allocated from the pool and +/// destroyed when recycled. +template +using IndexedMemPoolTraitsEagerRecycle = IndexedMemPoolTraits; /// Instances of IndexedMemPool dynamically allocate and then pool their /// element type (T), returning 4-byte integer indices that can be passed @@ -54,13 +116,17 @@ struct IndexedMemPoolRecycler; /// there won't be an ABA match due to the element being overwritten with /// a different type that has the same bit pattern. /// -/// IndexedMemPool has two object lifecycle strategies. The first -/// is to construct objects when they are allocated from the pool and -/// destroy them when they are recycled. In this mode allocIndex and -/// allocElem have emplace-like semantics. In the second mode, objects -/// are default-constructed the first time they are removed from the pool, -/// and deleted when the pool itself is deleted. By default the first -/// mode is used for non-trivial T, and the second is used for trivial T. +/// The object lifecycle strategy is controlled by the Traits parameter. +/// One strategy, implemented by IndexedMemPoolTraitsEagerRecycle, is to +/// construct objects when they are allocated from the pool and destroy +/// them when they are recycled. In this mode allocIndex and allocElem +/// have emplace-like semantics. In another strategy, implemented by +/// IndexedMemPoolTraitsLazyRecycle, objects are default-constructed the +/// first time they are removed from the pool, and deleted when the pool +/// itself is deleted. By default the first mode is used for non-trivial +/// T, and the second is used for trivial T. Clients can customize the +/// object lifecycle by providing their own Traits implementation. +/// See IndexedMemPoolTraits for a Traits example. /// /// IMPORTANT: Space for extra elements is allocated to account for those /// that are inaccessible because they are in other local lists, so the @@ -84,12 +150,12 @@ struct IndexedMemPoolRecycler; /// constructed, but delays element construction. This means that only /// elements that are actually returned to the caller get paged into the /// process's resident set (RSS). -template class Atom = std::atomic, - bool EagerRecycleWhenTrivial = false, - bool EagerRecycleWhenNotTrivial = true> +template < + typename T, + uint32_t NumLocalLists_ = 32, + uint32_t LocalListLimit_ = 200, + template class Atom = std::atomic, + typename Traits = IndexedMemPoolTraits> struct IndexedMemPool : boost::noncopyable { typedef T value_type; @@ -102,19 +168,15 @@ struct IndexedMemPool : boost::noncopyable { LocalListLimit = LocalListLimit_ }; - - static constexpr bool eagerRecycle() { - return std::is_trivial::value - ? EagerRecycleWhenTrivial : EagerRecycleWhenNotTrivial; - } - // these are public because clients may need to reason about the number // of bits required to hold indices from a pool, given its capacity static constexpr uint32_t maxIndexForCapacity(uint32_t capacity) { - // index of uint32_t(-1) == UINT32_MAX is reserved for isAllocated tracking - return std::min(uint64_t(capacity) + (NumLocalLists - 1) * LocalListLimit, - uint64_t(uint32_t(-1) - 1)); + // index of std::numeric_limits::max() is reserved for isAllocated + // tracking + return uint32_t(std::min( + uint64_t(capacity) + (NumLocalLists - 1) * LocalListLimit, + uint64_t(std::numeric_limits::max() - 1))); } static constexpr uint32_t capacityForMaxIndex(uint32_t maxIndex) { @@ -130,7 +192,7 @@ struct IndexedMemPool : boost::noncopyable { , globalHead_(TaggedPtr{}) { const size_t needed = sizeof(Slot) * (actualCapacity_ + 1); - long pagesize = sysconf(_SC_PAGESIZE); + size_t pagesize = size_t(sysconf(_SC_PAGESIZE)); mmapLength_ = ((needed - 1) & ~(pagesize - 1)) + pagesize; assert(needed <= mmapLength_ && mmapLength_ < needed + pagesize); assert((mmapLength_ % pagesize) == 0); @@ -146,10 +208,8 @@ struct IndexedMemPool : boost::noncopyable { /// Destroys all of the contained elements ~IndexedMemPool() { - if (!eagerRecycle()) { - for (size_t i = size_; i > 0; --i) { - slots_[i].~Slot(); - } + for (uint32_t i = maxAllocatedIndex(); i > 0; --i) { + Traits::cleanup(&slots_[i].elem); } munmap(slots_, mmapLength_); } @@ -158,29 +218,39 @@ struct IndexedMemPool : boost::noncopyable { /// simultaneously allocated and not yet recycled. Because of the /// local lists it is possible that more elements than this are returned /// successfully - size_t capacity() { + uint32_t capacity() { return capacityForMaxIndex(actualCapacity_); } + /// Returns the maximum index of elements ever allocated in this pool + /// including elements that have been recycled. + uint32_t maxAllocatedIndex() const { + // Take the minimum since it is possible that size_ > actualCapacity_. + // This can happen if there are multiple concurrent requests + // when size_ == actualCapacity_ - 1. + return std::min(uint32_t(size_), uint32_t(actualCapacity_)); + } + /// Finds a slot with a non-zero index, emplaces a T there if we're /// using the eager recycle lifecycle mode, and returns the index, - /// or returns 0 if no elements are available. + /// or returns 0 if no elements are available. Passes a pointer to + /// the element to Traits::onAllocate before the slot is marked as + /// allocated. template uint32_t allocIndex(Args&&... args) { - static_assert(sizeof...(Args) == 0 || eagerRecycle(), - "emplace-style allocation requires eager recycle, " - "which is defaulted only for non-trivial types"); auto idx = localPop(localHead()); - if (idx != 0 && eagerRecycle()) { - T* ptr = &slot(idx).elem; - new (ptr) T(std::forward(args)...); + if (idx != 0) { + Slot& s = slot(idx); + Traits::onAllocate(&s.elem, std::forward(args)...); + markAllocated(s); } return idx; } /// If an element is available, returns a std::unique_ptr to it that will /// recycle the element to the pool when it is reclaimed, otherwise returns - /// a null (falsy) std::unique_ptr + /// a null (falsy) std::unique_ptr. Passes a pointer to the element to + /// Traits::onAllocate before the slot is marked as allocated. template UniquePtr allocElem(Args&&... args) { auto idx = allocIndex(std::forward(args)...); @@ -191,9 +261,6 @@ struct IndexedMemPool : boost::noncopyable { /// Gives up ownership previously granted by alloc() void recycleIndex(uint32_t idx) { assert(isAllocated(idx)); - if (eagerRecycle()) { - slot(idx).elem.~T(); - } localPush(localHead(), idx); } @@ -218,7 +285,7 @@ struct IndexedMemPool : boost::noncopyable { auto slot = reinterpret_cast( reinterpret_cast(elem) - offsetof(Slot, elem)); - auto rv = slot - slots_; + auto rv = uint32_t(slot - slots_); // this assert also tests that rv is in range assert(elem == &(*this)[rv]); @@ -227,7 +294,7 @@ struct IndexedMemPool : boost::noncopyable { /// Returns true iff idx has been alloc()ed and not recycleIndex()ed bool isAllocated(uint32_t idx) const { - return slot(idx).localNext == uint32_t(-1); + return slot(idx).localNext.load(std::memory_order_acquire) == uint32_t(-1); } @@ -236,8 +303,8 @@ struct IndexedMemPool : boost::noncopyable { struct Slot { T elem; - uint32_t localNext; - uint32_t globalNext; + Atom localNext; + Atom globalNext; Slot() : localNext{}, globalNext{} {} }; @@ -284,7 +351,7 @@ struct IndexedMemPool : boost::noncopyable { } }; - struct FOLLY_ALIGN_TO_AVOID_FALSE_SHARING LocalList { + struct alignas(hardware_destructive_interference_size) LocalList { AtomicStruct head; LocalList() : head(TaggedPtr{}) {} @@ -292,25 +359,25 @@ struct IndexedMemPool : boost::noncopyable { ////////// fields + /// the number of bytes allocated from mmap, which is a multiple of + /// the page size of the machine + size_t mmapLength_; + /// the actual number of slots that we will allocate, to guarantee /// that we will satisfy the capacity requested at construction time. /// They will be numbered 1..actualCapacity_ (note the 1-based counting), /// and occupy slots_[1..actualCapacity_]. - size_t actualCapacity_; - - /// the number of bytes allocated from mmap, which is a multiple of - /// the page size of the machine - size_t mmapLength_; + uint32_t actualCapacity_; /// this records the number of slots that have actually been constructed. /// To allow use of atomic ++ instead of CAS, we let this overflow. /// The actual number of constructed elements is min(actualCapacity_, /// size_) - std::atomic size_; + Atom size_; /// raw storage, only 1..min(size_,actualCapacity_) (inclusive) are /// actually constructed. Note that slots_[0] is not constructed or used - Slot* FOLLY_ALIGN_TO_AVOID_FALSE_SHARING slots_; + alignas(hardware_destructive_interference_size) Slot* slots_; /// use AccessSpreader to find your list. We use stripes instead of /// thread-local to avoid the need to grow or shrink on thread start @@ -319,11 +386,12 @@ struct IndexedMemPool : boost::noncopyable { /// this is the head of a list of node chained by globalNext, that are /// themselves each the head of a list chained by localNext - AtomicStruct FOLLY_ALIGN_TO_AVOID_FALSE_SHARING globalHead_; + alignas(hardware_destructive_interference_size) + AtomicStruct globalHead_; ///////////// private methods - size_t slotIndex(uint32_t idx) const { + uint32_t slotIndex(uint32_t idx) const { assert(0 < idx && idx <= actualCapacity_ && idx <= size_.load(std::memory_order_acquire)); @@ -343,7 +411,7 @@ struct IndexedMemPool : boost::noncopyable { void globalPush(Slot& s, uint32_t localHead) { while (true) { TaggedPtr gh = globalHead_.load(std::memory_order_acquire); - s.globalNext = gh.idx; + s.globalNext.store(gh.idx, std::memory_order_relaxed); if (globalHead_.compare_exchange_strong(gh, gh.withIdx(localHead))) { // success return; @@ -356,7 +424,8 @@ struct IndexedMemPool : boost::noncopyable { Slot& s = slot(idx); TaggedPtr h = head.load(std::memory_order_acquire); while (true) { - s.localNext = h.idx; + s.localNext.store(h.idx, std::memory_order_release); + Traits::onRecycle(&slot(idx).elem); if (h.size() == LocalListLimit) { // push will overflow local list, steal it instead @@ -380,8 +449,11 @@ struct IndexedMemPool : boost::noncopyable { uint32_t globalPop() { while (true) { TaggedPtr gh = globalHead_.load(std::memory_order_acquire); - if (gh.idx == 0 || globalHead_.compare_exchange_strong( - gh, gh.withIdx(slot(gh.idx).globalNext))) { + if (gh.idx == 0 || + globalHead_.compare_exchange_strong( + gh, + gh.withIdx( + slot(gh.idx).globalNext.load(std::memory_order_relaxed)))) { // global list is empty, or pop was successful return gh.idx; } @@ -395,10 +467,9 @@ struct IndexedMemPool : boost::noncopyable { if (h.idx != 0) { // local list is non-empty, try to pop Slot& s = slot(h.idx); - if (head.compare_exchange_strong( - h, h.withIdx(s.localNext).withSizeDecr())) { + auto next = s.localNext.load(std::memory_order_relaxed); + if (head.compare_exchange_strong(h, h.withIdx(next).withSizeDecr())) { // success - s.localNext = uint32_t(-1); return h.idx; } continue; @@ -412,21 +483,15 @@ struct IndexedMemPool : boost::noncopyable { // allocation failed return 0; } - // default-construct it now if we aren't going to construct and - // destroy on each allocation - if (!eagerRecycle()) { - T* ptr = &slot(idx).elem; - new (ptr) T(); - } - slot(idx).localNext = uint32_t(-1); + Traits::initialize(&slot(idx).elem); return idx; } Slot& s = slot(idx); + auto next = s.localNext.load(std::memory_order_relaxed); if (head.compare_exchange_strong( - h, h.withIdx(s.localNext).withSize(LocalListLimit))) { + h, h.withIdx(next).withSize(LocalListLimit))) { // global list moved to local list, keep head for us - s.localNext = uint32_t(-1); return idx; } // local bulk push failed, return idx to the global list and try again @@ -435,9 +500,16 @@ struct IndexedMemPool : boost::noncopyable { } AtomicStruct& localHead() { - auto stripe = detail::AccessSpreader::current(NumLocalLists); + auto stripe = AccessSpreader::current(NumLocalLists); return local_[stripe].head; } + + void markAllocated(Slot& slot) { + slot.localNext.store(uint32_t(-1), std::memory_order_release); + } + + public: + static constexpr std::size_t kSlotSize = sizeof(Slot); }; namespace detail { @@ -461,9 +533,8 @@ struct IndexedMemPoolRecycler { } }; -} +} // namespace detail } // namespace folly -# pragma GCC diagnostic pop -#endif +FOLLY_POP_WARNING