X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FIndexedMemPool.h;h=d9e84935a077cedec6acc8fd75fe729d0507fed1;hb=b8ec2ef5137cb75cb75bb27c6b003c5f4e21bcb2;hp=5369ab7f6bc8d1f9208d69d5b42b0aa958df2080;hpb=275ca94d04e44f28cfa411668eb1c1dd8db90b80;p=folly.git diff --git a/folly/IndexedMemPool.h b/folly/IndexedMemPool.h index 5369ab7f..d9e84935 100644 --- a/folly/IndexedMemPool.h +++ b/folly/IndexedMemPool.h @@ -1,5 +1,5 @@ /* - * Copyright 2015 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,16 +14,17 @@ * limitations under the License. */ -#ifndef FOLLY_INDEXEDMEMPOOL_H -#define FOLLY_INDEXEDMEMPOOL_H +#pragma once -#include +#include #include -#include -#include +#include +#include #include #include #include +#include +#include // Ignore shadowing warnings within this file, so includers can use -Wshadow. #pragma GCC diagnostic push @@ -36,21 +37,30 @@ template struct IndexedMemPoolRecycler; } -/// Instances of IndexedMemPool dynamically allocate and then pool -/// their element type (T), returning 4-byte integer indices that can be -/// passed to the pool's operator[] method to access or obtain pointers -/// to the actual elements. Once they are constructed, elements are -/// never destroyed. These two features are useful for lock-free -/// algorithms. The indexing behavior makes it easy to build tagged -/// pointer-like-things, since a large number of elements can be managed -/// using fewer bits than a full pointer. The pooling behavior makes -/// it safe to read from T-s even after they have been recycled, since -/// it is guaranteed that the memory won't have been returned to the OS -/// and unmapped (the algorithm must still use a mechanism to validate -/// that the read was correct, but it doesn't have to worry about page -/// faults), and if the elements use internal sequence numbers it can be -/// guaranteed that there won't be an ABA match due to the element being -/// overwritten with a different type that has the same bit pattern. +/// Instances of IndexedMemPool dynamically allocate and then pool their +/// element type (T), returning 4-byte integer indices that can be passed +/// to the pool's operator[] method to access or obtain pointers to the +/// actual elements. The memory backing items returned from the pool +/// will always be readable, even if items have been returned to the pool. +/// These two features are useful for lock-free algorithms. The indexing +/// behavior makes it easy to build tagged pointer-like-things, since +/// a large number of elements can be managed using fewer bits than a +/// full pointer. The access-after-free behavior makes it safe to read +/// from T-s even after they have been recycled, since it is guaranteed +/// that the memory won't have been returned to the OS and unmapped +/// (the algorithm must still use a mechanism to validate that the read +/// was correct, but it doesn't have to worry about page faults), and if +/// the elements use internal sequence numbers it can be guaranteed that +/// there won't be an ABA match due to the element being overwritten with +/// a different type that has the same bit pattern. +/// +/// IndexedMemPool has two object lifecycle strategies. The first +/// is to construct objects when they are allocated from the pool and +/// destroy them when they are recycled. In this mode allocIndex and +/// allocElem have emplace-like semantics. In the second mode, objects +/// are default-constructed the first time they are removed from the pool, +/// and deleted when the pool itself is deleted. By default the first +/// mode is used for non-trivial T, and the second is used for trivial T. /// /// IMPORTANT: Space for extra elements is allocated to account for those /// that are inaccessible because they are in other local lists, so the @@ -74,10 +84,13 @@ struct IndexedMemPoolRecycler; /// constructed, but delays element construction. This means that only /// elements that are actually returned to the caller get paged into the /// process's resident set (RSS). -template class Atom = std::atomic> +template < + typename T, + uint32_t NumLocalLists_ = 32, + uint32_t LocalListLimit_ = 200, + template class Atom = std::atomic, + bool EagerRecycleWhenTrivial = false, + bool EagerRecycleWhenNotTrivial = true> struct IndexedMemPool : boost::noncopyable { typedef T value_type; @@ -91,13 +104,20 @@ struct IndexedMemPool : boost::noncopyable { }; + static constexpr bool eagerRecycle() { + return std::is_trivial::value + ? EagerRecycleWhenTrivial : EagerRecycleWhenNotTrivial; + } + // these are public because clients may need to reason about the number // of bits required to hold indices from a pool, given its capacity static constexpr uint32_t maxIndexForCapacity(uint32_t capacity) { - // index of uint32_t(-1) == UINT32_MAX is reserved for isAllocated tracking - return std::min(uint64_t(capacity) + (NumLocalLists - 1) * LocalListLimit, - uint64_t(uint32_t(-1) - 1)); + // index of std::numeric_limits::max() is reserved for isAllocated + // tracking + return uint32_t(std::min( + uint64_t(capacity) + (NumLocalLists - 1) * LocalListLimit, + uint64_t(std::numeric_limits::max() - 1))); } static constexpr uint32_t capacityForMaxIndex(uint32_t maxIndex) { @@ -113,7 +133,7 @@ struct IndexedMemPool : boost::noncopyable { , globalHead_(TaggedPtr{}) { const size_t needed = sizeof(Slot) * (actualCapacity_ + 1); - long pagesize = sysconf(_SC_PAGESIZE); + size_t pagesize = size_t(sysconf(_SC_PAGESIZE)); mmapLength_ = ((needed - 1) & ~(pagesize - 1)) + pagesize; assert(needed <= mmapLength_ && mmapLength_ < needed + pagesize); assert((mmapLength_ % pagesize) == 0); @@ -129,8 +149,14 @@ struct IndexedMemPool : boost::noncopyable { /// Destroys all of the contained elements ~IndexedMemPool() { - for (size_t i = size_; i > 0; --i) { - slots_[i].~Slot(); + if (!eagerRecycle()) { + // Take the minimum since it is possible that size_ > actualCapacity_. + // This can happen if there are multiple concurrent requests + // when size_ == actualCapacity_ - 1. + uint32_t last = std::min(uint32_t(size_), uint32_t(actualCapacity_)); + for (uint32_t i = last; i > 0; --i) { + slots_[i].~Slot(); + } } munmap(slots_, mmapLength_); } @@ -139,28 +165,42 @@ struct IndexedMemPool : boost::noncopyable { /// simultaneously allocated and not yet recycled. Because of the /// local lists it is possible that more elements than this are returned /// successfully - size_t capacity() { + uint32_t capacity() { return capacityForMaxIndex(actualCapacity_); } - /// Grants ownership of (*this)[retval], or returns 0 if no elements - /// are available - uint32_t allocIndex() { - return localPop(localHead()); + /// Finds a slot with a non-zero index, emplaces a T there if we're + /// using the eager recycle lifecycle mode, and returns the index, + /// or returns 0 if no elements are available. + template + uint32_t allocIndex(Args&&... args) { + static_assert(sizeof...(Args) == 0 || eagerRecycle(), + "emplace-style allocation requires eager recycle, " + "which is defaulted only for non-trivial types"); + auto idx = localPop(localHead()); + if (idx != 0 && eagerRecycle()) { + T* ptr = &slot(idx).elem; + new (ptr) T(std::forward(args)...); + } + return idx; } /// If an element is available, returns a std::unique_ptr to it that will /// recycle the element to the pool when it is reclaimed, otherwise returns /// a null (falsy) std::unique_ptr - UniquePtr allocElem() { - auto idx = allocIndex(); - auto ptr = idx == 0 ? nullptr : &slot(idx).elem; + template + UniquePtr allocElem(Args&&... args) { + auto idx = allocIndex(std::forward(args)...); + T* ptr = idx == 0 ? nullptr : &slot(idx).elem; return UniquePtr(ptr, typename UniquePtr::deleter_type(this)); } /// Gives up ownership previously granted by alloc() void recycleIndex(uint32_t idx) { assert(isAllocated(idx)); + if (eagerRecycle()) { + slot(idx).elem.~T(); + } localPush(localHead(), idx); } @@ -185,7 +225,7 @@ struct IndexedMemPool : boost::noncopyable { auto slot = reinterpret_cast( reinterpret_cast(elem) - offsetof(Slot, elem)); - auto rv = slot - slots_; + auto rv = uint32_t(slot - slots_); // this assert also tests that rv is in range assert(elem == &(*this)[rv]); @@ -194,7 +234,7 @@ struct IndexedMemPool : boost::noncopyable { /// Returns true iff idx has been alloc()ed and not recycleIndex()ed bool isAllocated(uint32_t idx) const { - return slot(idx).localNext == uint32_t(-1); + return slot(idx).localNext.load(std::memory_order_relaxed) == uint32_t(-1); } @@ -203,8 +243,8 @@ struct IndexedMemPool : boost::noncopyable { struct Slot { T elem; - uint32_t localNext; - uint32_t globalNext; + Atom localNext; + Atom globalNext; Slot() : localNext{}, globalNext{} {} }; @@ -259,25 +299,25 @@ struct IndexedMemPool : boost::noncopyable { ////////// fields + /// the number of bytes allocated from mmap, which is a multiple of + /// the page size of the machine + size_t mmapLength_; + /// the actual number of slots that we will allocate, to guarantee /// that we will satisfy the capacity requested at construction time. /// They will be numbered 1..actualCapacity_ (note the 1-based counting), /// and occupy slots_[1..actualCapacity_]. - size_t actualCapacity_; - - /// the number of bytes allocated from mmap, which is a multiple of - /// the page size of the machine - size_t mmapLength_; + uint32_t actualCapacity_; /// this records the number of slots that have actually been constructed. /// To allow use of atomic ++ instead of CAS, we let this overflow. /// The actual number of constructed elements is min(actualCapacity_, /// size_) - std::atomic size_; + Atom size_; /// raw storage, only 1..min(size_,actualCapacity_) (inclusive) are /// actually constructed. Note that slots_[0] is not constructed or used - Slot* FOLLY_ALIGN_TO_AVOID_FALSE_SHARING slots_; + FOLLY_ALIGN_TO_AVOID_FALSE_SHARING Slot* slots_; /// use AccessSpreader to find your list. We use stripes instead of /// thread-local to avoid the need to grow or shrink on thread start @@ -286,11 +326,11 @@ struct IndexedMemPool : boost::noncopyable { /// this is the head of a list of node chained by globalNext, that are /// themselves each the head of a list chained by localNext - AtomicStruct FOLLY_ALIGN_TO_AVOID_FALSE_SHARING globalHead_; + FOLLY_ALIGN_TO_AVOID_FALSE_SHARING AtomicStruct globalHead_; ///////////// private methods - size_t slotIndex(uint32_t idx) const { + uint32_t slotIndex(uint32_t idx) const { assert(0 < idx && idx <= actualCapacity_ && idx <= size_.load(std::memory_order_acquire)); @@ -310,7 +350,7 @@ struct IndexedMemPool : boost::noncopyable { void globalPush(Slot& s, uint32_t localHead) { while (true) { TaggedPtr gh = globalHead_.load(std::memory_order_acquire); - s.globalNext = gh.idx; + s.globalNext.store(gh.idx, std::memory_order_relaxed); if (globalHead_.compare_exchange_strong(gh, gh.withIdx(localHead))) { // success return; @@ -323,7 +363,7 @@ struct IndexedMemPool : boost::noncopyable { Slot& s = slot(idx); TaggedPtr h = head.load(std::memory_order_acquire); while (true) { - s.localNext = h.idx; + s.localNext.store(h.idx, std::memory_order_relaxed); if (h.size() == LocalListLimit) { // push will overflow local list, steal it instead @@ -347,8 +387,11 @@ struct IndexedMemPool : boost::noncopyable { uint32_t globalPop() { while (true) { TaggedPtr gh = globalHead_.load(std::memory_order_acquire); - if (gh.idx == 0 || globalHead_.compare_exchange_strong( - gh, gh.withIdx(slot(gh.idx).globalNext))) { + if (gh.idx == 0 || + globalHead_.compare_exchange_strong( + gh, + gh.withIdx( + slot(gh.idx).globalNext.load(std::memory_order_relaxed)))) { // global list is empty, or pop was successful return gh.idx; } @@ -362,10 +405,10 @@ struct IndexedMemPool : boost::noncopyable { if (h.idx != 0) { // local list is non-empty, try to pop Slot& s = slot(h.idx); - if (head.compare_exchange_strong( - h, h.withIdx(s.localNext).withSizeDecr())) { + auto next = s.localNext.load(std::memory_order_relaxed); + if (head.compare_exchange_strong(h, h.withIdx(next).withSizeDecr())) { // success - s.localNext = uint32_t(-1); + s.localNext.store(uint32_t(-1), std::memory_order_relaxed); return h.idx; } continue; @@ -379,17 +422,22 @@ struct IndexedMemPool : boost::noncopyable { // allocation failed return 0; } - // construct it - new (&slot(idx)) T(); - slot(idx).localNext = uint32_t(-1); + // default-construct it now if we aren't going to construct and + // destroy on each allocation + if (!eagerRecycle()) { + T* ptr = &slot(idx).elem; + new (ptr) T(); + } + slot(idx).localNext.store(uint32_t(-1), std::memory_order_relaxed); return idx; } Slot& s = slot(idx); + auto next = s.localNext.load(std::memory_order_relaxed); if (head.compare_exchange_strong( - h, h.withIdx(s.localNext).withSize(LocalListLimit))) { + h, h.withIdx(next).withSize(LocalListLimit))) { // global list moved to local list, keep head for us - s.localNext = uint32_t(-1); + s.localNext.store(uint32_t(-1), std::memory_order_relaxed); return idx; } // local bulk push failed, return idx to the global list and try again @@ -429,4 +477,3 @@ struct IndexedMemPoolRecycler { } // namespace folly # pragma GCC diagnostic pop -#endif