X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=folly%2FFBVector.h;h=428eba628d10b1629887c3fb09b94c181d1c3ff9;hb=eb7bc45f22e034751a76bf02445c669471e60780;hp=b9cecd93db68538e46eda6c2deb2897ecffc692e;hpb=27494a20393fa45072e7d526d358835f3abe312a;p=folly.git diff --git a/folly/FBVector.h b/folly/FBVector.h index b9cecd93..428eba62 100644 --- a/folly/FBVector.h +++ b/folly/FBVector.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,627 +14,1066 @@ * limitations under the License. */ -// Andrei Alexandrescu (aalexandre) - -/** - * Vector type. Drop-in replacement for std::vector featuring - * significantly faster primitives, see e.g. benchmark results at - * https:*phabricator.fb.com/D235852. - * - * In order for a type to be used with fbvector, it must be - * relocatable, see Traits.h. - * - * For user-defined types you must specialize templates - * appropriately. Consult Traits.h for ways to do so and for a handy - * family of macros FOLLY_ASSUME_FBVECTOR_COMPATIBLE*. +/* + * Nicholas Ormrod (njormrod) + * Andrei Alexandrescu (aalexandre) * - * For more information and documentation see folly/docs/FBVector.md + * FBVector is Facebook's drop-in implementation of std::vector. It has special + * optimizations for use with relocatable types and jemalloc. */ -#ifndef FOLLY_FBVECTOR_H_ -#define FOLLY_FBVECTOR_H_ +#pragma once + +//============================================================================= +// headers -#include "folly/Foreach.h" -#include "folly/Malloc.h" -#include "folly/Traits.h" -#include #include -#include -#include #include -#include -#include -#include +#include +#include +#include #include +#include + +#include +#include +#include +#include +#include + +//============================================================================= +// forward declaration namespace folly { -/** - * Forward declaration for use by FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2, - * see folly/Traits.h. - */ -template > +template > class fbvector; -} +} // namespace folly -// You can define an fbvector of fbvectors. -FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2(folly::fbvector); +//============================================================================= +// unrolling + +#define FOLLY_FBV_UNROLL_PTR(first, last, OP) do { \ + for (; (last) - (first) >= 4; (first) += 4) { \ + OP(((first) + 0)); \ + OP(((first) + 1)); \ + OP(((first) + 2)); \ + OP(((first) + 3)); \ + } \ + for (; (first) != (last); ++(first)) OP((first)); \ +} while(0); + +//============================================================================= +/////////////////////////////////////////////////////////////////////////////// +// // +// fbvector class // +// // +/////////////////////////////////////////////////////////////////////////////// namespace folly { -namespace fbvector_detail { -/** - * isForwardIterator::value yields true if T is a forward iterator - * or better, and false otherwise. - */ -template struct isForwardIterator { - enum { value = boost::is_convertible< - typename std::iterator_traits::iterator_category, - std::forward_iterator_tag>::value - }; -}; +template +class fbvector { + + //=========================================================================== + //--------------------------------------------------------------------------- + // implementation + private: + typedef std::allocator_traits A; + + struct Impl : public Allocator { + // typedefs + typedef typename A::pointer pointer; + typedef typename A::size_type size_type; + + // data + pointer b_, e_, z_; + + // constructors + Impl() : Allocator(), b_(nullptr), e_(nullptr), z_(nullptr) {} + /* implicit */ Impl(const Allocator& a) + : Allocator(a), b_(nullptr), e_(nullptr), z_(nullptr) {} + /* implicit */ Impl(Allocator&& a) + : Allocator(std::move(a)), b_(nullptr), e_(nullptr), z_(nullptr) {} + + /* implicit */ Impl(size_type n, const Allocator& a = Allocator()) + : Allocator(a) + { init(n); } + + Impl(Impl&& other) noexcept + : Allocator(std::move(other)), + b_(other.b_), e_(other.e_), z_(other.z_) + { other.b_ = other.e_ = other.z_ = nullptr; } + + // destructor + ~Impl() { + destroy(); + } -/** - * Destroys all elements in the range [b, e). If the type referred to - * by the iterators has a trivial destructor, does nothing. - */ -template -void destroyRange(It b, It e) { - typedef typename boost::remove_reference::type T; - if (boost::has_trivial_destructor::value) return; - for (; b != e; ++b) { - (*b).~T(); + // allocation + // note that 'allocate' and 'deallocate' are inherited from Allocator + T* D_allocate(size_type n) { + if (usingStdAllocator::value) { + return static_cast(malloc(n * sizeof(T))); + } else { + return std::allocator_traits::allocate(*this, n); + } + } + + void D_deallocate(T* p, size_type n) noexcept { + if (usingStdAllocator::value) { + free(p); + } else { + std::allocator_traits::deallocate(*this, p, n); + } + } + + // helpers + void swapData(Impl& other) { + std::swap(b_, other.b_); + std::swap(e_, other.e_); + std::swap(z_, other.z_); + } + + // data ops + inline void destroy() noexcept { + if (b_) { + // THIS DISPATCH CODE IS DUPLICATED IN fbvector::D_destroy_range_a. + // It has been inlined here for speed. It calls the static fbvector + // methods to perform the actual destruction. + if (usingStdAllocator::value) { + S_destroy_range(b_, e_); + } else { + S_destroy_range_a(*this, b_, e_); + } + + D_deallocate(b_, size_type(z_ - b_)); + } + } + + void init(size_type n) { + if (UNLIKELY(n == 0)) { + b_ = e_ = z_ = nullptr; + } else { + size_type sz = folly::goodMallocSize(n * sizeof(T)) / sizeof(T); + b_ = D_allocate(sz); + e_ = b_; + z_ = b_ + sz; + } + } + + void set(pointer newB, size_type newSize, size_type newCap) { + z_ = newB + newCap; + e_ = newB + newSize; + b_ = newB; + } + + void reset(size_type newCap) { + destroy(); + try { + init(newCap); + } catch (...) { + init(0); + throw; + } + } + void reset() { // same as reset(0) + destroy(); + b_ = e_ = z_ = nullptr; + } + } impl_; + + static void swap(Impl& a, Impl& b) { + using std::swap; + if (!usingStdAllocator::value) { + swap(a, b); + } + a.swapData(b); + } + + //=========================================================================== + //--------------------------------------------------------------------------- + // types and constants + public: + typedef T value_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef T* iterator; + typedef const T* const_iterator; + typedef size_t size_type; + typedef typename std::make_signed::type difference_type; + typedef Allocator allocator_type; + typedef typename A::pointer pointer; + typedef typename A::const_pointer const_pointer; + typedef std::reverse_iterator reverse_iterator; + typedef std::reverse_iterator const_reverse_iterator; + + private: + typedef std::integral_constant::value && + sizeof(T) <= 16 // don't force large structures to be passed by value + > should_pass_by_value; + typedef typename std::conditional< + should_pass_by_value::value, T, const T&>::type VT; + typedef typename std::conditional< + should_pass_by_value::value, T, T&&>::type MT; + + typedef std::integral_constant>::value> usingStdAllocator; + typedef std::integral_constant moveIsSwap; + + //=========================================================================== + //--------------------------------------------------------------------------- + // allocator helpers + private: + //--------------------------------------------------------------------------- + // allocate + + T* M_allocate(size_type n) { + return impl_.D_allocate(n); + } + + //--------------------------------------------------------------------------- + // deallocate + + void M_deallocate(T* p, size_type n) noexcept { + impl_.D_deallocate(p, n); + } + + //--------------------------------------------------------------------------- + // construct + + // GCC is very sensitive to the exact way that construct is called. For + // that reason there are several different specializations of construct. + + template + void M_construct(U* p, Args&&... args) { + if (usingStdAllocator::value) { + new (p) U(std::forward(args)...); + } else { + std::allocator_traits::construct( + impl_, p, std::forward(args)...); + } } -} -/** - * Moves the "interesting" part of value to the uninitialized memory - * at address addr, and leaves value in a destroyable state. - */ + template + static void S_construct(U* p, Args&&... args) { + new (p) U(std::forward(args)...); + } -template -typename boost::enable_if_c< - boost::has_trivial_assign::value ->::type -uninitialized_destructive_move(T& value, T* addr) { - // Just assign the thing; this is most efficient - *addr = value; -} + template + static void S_construct_a(Allocator& a, U* p, Args&&... args) { + std::allocator_traits::construct( + a, p, std::forward(args)...); + } -template -typename boost::enable_if_c< - !boost::has_trivial_assign::value && - boost::has_nothrow_constructor::value ->::type -uninitialized_destructive_move(T& value, T* addr) { - // Cheap default constructor - move and reinitialize - memcpy(addr, &value, sizeof(T)); - new(&value) T; -} + // scalar optimization + // TODO we can expand this optimization to: default copyable and assignable + template ::value>::type> + void M_construct(U* p, U arg) { + if (usingStdAllocator::value) { + *p = arg; + } else { + std::allocator_traits::construct(impl_, p, arg); + } + } -template -typename std::enable_if< - !boost::has_trivial_assign::value && - !boost::has_nothrow_constructor::value ->::type -uninitialized_destructive_move(T& value, T* addr) { - // User defined move construction. - - // TODO: we should probably prefer this over the above memcpy() - // version when the type has a user-defined move constructor. We - // don't right now because 4.6 doesn't implement - // std::is_move_constructible<> yet. - new (addr) T(std::move(value)); -} + template ::value>::type> + static void S_construct(U* p, U arg) { + *p = arg; + } -/** - * Fills n objects of type T starting at address b with T's default - * value. If the operation throws, destroys all objects constructed so - * far and calls free(b). - */ -template -void uninitializedFillDefaultOrFree(T * b, size_t n) { - if (boost::is_arithmetic::value || boost::is_pointer::value) { - if (n <= 16384 / sizeof(T)) { - memset(b, 0, n * sizeof(T)); + template ::value>::type> + static void S_construct_a(Allocator& a, U* p, U arg) { + std::allocator_traits::construct(a, p, arg); + } + + // const& optimization + template ::value>::type> + void M_construct(U* p, const U& value) { + if (usingStdAllocator::value) { + new (p) U(value); + } else { + std::allocator_traits::construct(impl_, p, value); + } + } + + template ::value>::type> + static void S_construct(U* p, const U& value) { + new (p) U(value); + } + + template ::value>::type> + static void S_construct_a(Allocator& a, U* p, const U& value) { + std::allocator_traits::construct(a, p, value); + } + + //--------------------------------------------------------------------------- + // destroy + + void M_destroy(T* p) noexcept { + if (usingStdAllocator::value) { + if (!std::is_trivially_destructible::value) { + p->~T(); + } + } else { + std::allocator_traits::destroy(impl_, p); + } + } + + //=========================================================================== + //--------------------------------------------------------------------------- + // algorithmic helpers + private: + //--------------------------------------------------------------------------- + // destroy_range + + // wrappers + void M_destroy_range_e(T* pos) noexcept { + D_destroy_range_a(pos, impl_.e_); + impl_.e_ = pos; + } + + // dispatch + // THIS DISPATCH CODE IS DUPLICATED IN IMPL. SEE IMPL FOR DETAILS. + void D_destroy_range_a(T* first, T* last) noexcept { + if (usingStdAllocator::value) { + S_destroy_range(first, last); } else { - goto duff_fill; - } - } else if (boost::has_nothrow_constructor::value) { - duff_fill: - auto i = b; - auto const e1 = b + (n & ~size_t(7)); - for (; i != e1; i += 8) { - new(i) T(); - new(i + 1) T(); - new(i + 2) T(); - new(i + 3) T(); - new(i + 4) T(); - new(i + 5) T(); - new(i + 6) T(); - new(i + 7) T(); - } - for (auto const e = b + n; i != e; ++i) { - new(i) T(); - } - } else { - // Conservative approach - auto i = b; + S_destroy_range_a(impl_, first, last); + } + } + + // allocator + static void S_destroy_range_a(Allocator& a, T* first, T* last) noexcept { + for (; first != last; ++first) { + std::allocator_traits::destroy(a, first); + } + } + + // optimized + static void S_destroy_range(T* first, T* last) noexcept { + if (!std::is_trivially_destructible::value) { + // EXPERIMENTAL DATA on fbvector> (where each vector has + // size 0). + // The unrolled version seems to work faster for small to medium sized + // fbvectors. It gets a 10% speedup on fbvectors of size 1024, 64, and + // 16. + // The simple loop version seems to work faster for large fbvectors. The + // unrolled version is about 6% slower on fbvectors on size 16384. + // The two methods seem tied for very large fbvectors. The unrolled + // version is about 0.5% slower on size 262144. + + // for (; first != last; ++first) first->~T(); + #define FOLLY_FBV_OP(p) (p)->~T() + FOLLY_FBV_UNROLL_PTR(first, last, FOLLY_FBV_OP) + #undef FOLLY_FBV_OP + } + } + + //--------------------------------------------------------------------------- + // uninitialized_fill_n + + // wrappers + void M_uninitialized_fill_n_e(size_type sz) { + D_uninitialized_fill_n_a(impl_.e_, sz); + impl_.e_ += sz; + } + + void M_uninitialized_fill_n_e(size_type sz, VT value) { + D_uninitialized_fill_n_a(impl_.e_, sz, value); + impl_.e_ += sz; + } + + // dispatch + void D_uninitialized_fill_n_a(T* dest, size_type sz) { + if (usingStdAllocator::value) { + S_uninitialized_fill_n(dest, sz); + } else { + S_uninitialized_fill_n_a(impl_, dest, sz); + } + } + + void D_uninitialized_fill_n_a(T* dest, size_type sz, VT value) { + if (usingStdAllocator::value) { + S_uninitialized_fill_n(dest, sz, value); + } else { + S_uninitialized_fill_n_a(impl_, dest, sz, value); + } + } + + // allocator + template + static void S_uninitialized_fill_n_a(Allocator& a, T* dest, + size_type sz, Args&&... args) { + auto b = dest; + auto e = dest + sz; try { - for (auto const e = b + n; i != e; ++i) { - new(i) T; + for (; b != e; ++b) { + std::allocator_traits::construct(a, b, + std::forward(args)...); } } catch (...) { - destroyRange(b, i); - free(b); + S_destroy_range_a(a, dest, b); throw; } } -} -/** - * Fills n objects of type T starting at address b with value. If the - * operation throws, destroys all objects constructed so far and calls - * free(b). - */ -template -void uninitializedFillOrFree(T * b, size_t n, const T& value) { - auto const e = b + n; - if (boost::has_trivial_copy::value) { - auto i = b; - auto const e1 = b + (n & ~size_t(7)); - for (; i != e1; i += 8) { - new(i) T(value); - new(i + 1) T(value); - new(i + 2) T(value); - new(i + 3) T(value); - new(i + 4) T(value); - new(i + 5) T(value); - new(i + 6) T(value); - new(i + 7) T(value); - } - for (; i != e; ++i) { - new(i) T(value); - } - } else { - // Conservative approach - auto i = b; + // optimized + static void S_uninitialized_fill_n(T* dest, size_type n) { + if (folly::IsZeroInitializable::value) { + if (LIKELY(n != 0)) { + std::memset(dest, 0, sizeof(T) * n); + } + } else { + auto b = dest; + auto e = dest + n; + try { + for (; b != e; ++b) { + S_construct(b); + } + } catch (...) { + --b; + for (; b >= dest; --b) { + b->~T(); + } + throw; + } + } + } + + static void S_uninitialized_fill_n(T* dest, size_type n, const T& value) { + auto b = dest; + auto e = dest + n; try { - for (; i != e; ++i) { - new(i) T(value); + for (; b != e; ++b) { + S_construct(b, value); } } catch (...) { - destroyRange(b, i); - free(b); + S_destroy_range(dest, b); throw; } } -} -} // namespace fbvector_detail -/** - * This is the std::vector replacement. For conformity, fbvector takes - * the same template parameters, but it doesn't use the - * allocator. Instead, it uses malloc, and when present, jemalloc's - * extensions. - */ -template -class fbvector : private boost::totally_ordered > { - bool isSane() const { - return - begin() <= end() && - empty() == (size() == 0) && - empty() == (begin() == end()) && - size() <= max_size() && - capacity() <= max_size() && - size() <= capacity() && - - // Either we have no capacity or our pointers should make sense: - ((!b_ && !e_ && !z_) || (b_ != z_ && e_ <= z_)); - } - - struct Invariant { -#ifndef NDEBUG - explicit Invariant(const fbvector& s) : s_(s) { - assert(s_.isSane()); - } - ~Invariant() { - assert(s_.isSane()); - } - private: - const fbvector& s_; -#else - explicit Invariant(const fbvector&) {} -#endif - Invariant& operator=(const Invariant&); - }; - -public: - -// types: - typedef T value_type; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef T* iterator; - typedef const T* const_iterator; - typedef size_t size_type; - typedef ssize_t difference_type; - // typedef typename allocator_traits::pointer pointer; - // typedef typename allocator_traits::const_pointer const_pointer; - typedef Allocator allocator_type; - typedef typename Allocator::pointer pointer; - typedef typename Allocator::const_pointer const_pointer; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - -// 23.3.6.1 construct/copy/destroy: - fbvector() : b_(NULL), e_(NULL), z_(NULL) {} - - explicit fbvector(const Allocator&) { - new(this) fbvector; - } - - explicit fbvector(const size_type n) { - if (n == 0) { - b_ = e_ = z_ = 0; - return; - } + //--------------------------------------------------------------------------- + // uninitialized_copy + + // it is possible to add an optimization for the case where + // It = move(T*) and IsRelocatable and Is0Initiailizable - auto const nBytes = goodMallocSize(n * sizeof(T)); - b_ = static_cast(malloc(nBytes)); - fbvector_detail::uninitializedFillDefaultOrFree(b_, n); - e_ = b_ + n; - z_ = b_ + nBytes / sizeof(T); + // wrappers + template + void M_uninitialized_copy_e(It first, It last) { + D_uninitialized_copy_a(impl_.e_, first, last); + impl_.e_ += std::distance(first, last); } - fbvector(const size_type n, const T& value) { - if (!n) { - b_ = e_ = z_ = 0; - return; + template + void M_uninitialized_move_e(It first, It last) { + D_uninitialized_move_a(impl_.e_, first, last); + impl_.e_ += std::distance(first, last); + } + + // dispatch + template + void D_uninitialized_copy_a(T* dest, It first, It last) { + if (usingStdAllocator::value) { + if (folly::IsTriviallyCopyable::value) { + S_uninitialized_copy_bits(dest, first, last); + } else { + S_uninitialized_copy(dest, first, last); + } + } else { + S_uninitialized_copy_a(impl_, dest, first, last); } + } - auto const nBytes = goodMallocSize(n * sizeof(T)); - b_ = static_cast(malloc(nBytes)); - fbvector_detail::uninitializedFillOrFree(b_, n, value); - e_ = b_ + n; - z_ = b_ + nBytes / sizeof(T); + template + void D_uninitialized_move_a(T* dest, It first, It last) { + D_uninitialized_copy_a(dest, + std::make_move_iterator(first), std::make_move_iterator(last)); } - fbvector(const size_type n, const T& value, const Allocator&) { - new(this) fbvector(n, value); + // allocator + template + static void + S_uninitialized_copy_a(Allocator& a, T* dest, It first, It last) { + auto b = dest; + try { + for (; first != last; ++first, ++b) { + std::allocator_traits::construct(a, b, *first); + } + } catch (...) { + S_destroy_range_a(a, dest, b); + throw; + } } - template - fbvector(InputIteratorOrNum first, InputIteratorOrNum last) { - new(this) fbvector; - assign(first, last); + // optimized + template + static void S_uninitialized_copy(T* dest, It first, It last) { + auto b = dest; + try { + for (; first != last; ++first, ++b) { + S_construct(b, *first); + } + } catch (...) { + S_destroy_range(dest, b); + throw; + } } - template - fbvector(InputIterator first, InputIterator last, - const Allocator&) { - new(this) fbvector(first, last); + static void + S_uninitialized_copy_bits(T* dest, const T* first, const T* last) { + if (last != first) { + std::memcpy((void*)dest, (void*)first, (last - first) * sizeof(T)); + } } - fbvector(const fbvector& rhs) { - new(this) fbvector(rhs.begin(), rhs.end()); + static void + S_uninitialized_copy_bits(T* dest, std::move_iterator first, + std::move_iterator last) { + T* bFirst = first.base(); + T* bLast = last.base(); + if (bLast != bFirst) { + std::memcpy((void*)dest, (void*)bFirst, (bLast - bFirst) * sizeof(T)); + } } - fbvector(const fbvector& rhs, const Allocator&) { - new(this) fbvector(rhs); + + template + static void + S_uninitialized_copy_bits(T* dest, It first, It last) { + S_uninitialized_copy(dest, first, last); } - fbvector(fbvector&& o, const Allocator& = Allocator()) - : b_(o.b_) - , e_(o.e_) - , z_(o.z_) - { - o.b_ = o.e_ = o.z_ = 0; + //--------------------------------------------------------------------------- + // copy_n + + // This function is "unsafe": it assumes that the iterator can be advanced at + // least n times. However, as a private function, that unsafety is managed + // wholly by fbvector itself. + + template + static It S_copy_n(T* dest, It first, size_type n) { + auto e = dest + n; + for (; dest != e; ++dest, ++first) { + *dest = *first; + } + return first; } - fbvector(std::initializer_list il, const Allocator& = Allocator()) { - new(this) fbvector(il.begin(), il.end()); + static const T* S_copy_n(T* dest, const T* first, size_type n) { + if (folly::IsTriviallyCopyable::value) { + std::memcpy((void*)dest, (void*)first, n * sizeof(T)); + return first + n; + } else { + return S_copy_n(dest, first, n); + } } - ~fbvector() { - // fbvector only works with relocatable objects. We insert this - // static check inside the destructor because pretty much any - // instantiation of fbvector will generate the destructor (and - // therefore refuse compilation if the assertion fails). To see - // how you can enable IsRelocatable for your type, refer to the - // definition of IsRelocatable in Traits.h. - BOOST_STATIC_ASSERT(IsRelocatable::value); - if (!b_) return; - fbvector_detail::destroyRange(b_, e_); - free(b_); + static std::move_iterator + S_copy_n(T* dest, std::move_iterator mIt, size_type n) { + if (folly::IsTriviallyCopyable::value) { + T* first = mIt.base(); + std::memcpy((void*)dest, (void*)first, n * sizeof(T)); + return std::make_move_iterator(first + n); + } else { + return S_copy_n>(dest, mIt, n); + } } - fbvector& operator=(const fbvector& rhs) { - assign(rhs.begin(), rhs.end()); - return *this; + + //=========================================================================== + //--------------------------------------------------------------------------- + // relocation helpers + private: + // Relocation is divided into three parts: + // + // 1: relocate_move + // Performs the actual movement of data from point a to point b. + // + // 2: relocate_done + // Destroys the old data. + // + // 3: relocate_undo + // Destoys the new data and restores the old data. + // + // The three steps are used because there may be an exception after part 1 + // has completed. If that is the case, then relocate_undo can nullify the + // initial move. Otherwise, relocate_done performs the last bit of tidying + // up. + // + // The relocation trio may use either memcpy, move, or copy. It is decided + // by the following case statement: + // + // IsRelocatable && usingStdAllocator -> memcpy + // has_nothrow_move && usingStdAllocator -> move + // cannot copy -> move + // default -> copy + // + // If the class is non-copyable then it must be movable. However, if the + // move constructor is not noexcept, i.e. an error could be thrown, then + // relocate_undo will be unable to restore the old data, for fear of a + // second exception being thrown. This is a known and unavoidable + // deficiency. In lieu of a strong exception guarantee, relocate_undo does + // the next best thing: it provides a weak exception guarantee by + // destorying the new data, but leaving the old data in an indeterminate + // state. Note that that indeterminate state will be valid, since the + // old data has not been destroyed; it has merely been the source of a + // move, which is required to leave the source in a valid state. + + // wrappers + void M_relocate(T* newB) { + relocate_move(newB, impl_.b_, impl_.e_); + relocate_done(newB, impl_.b_, impl_.e_); + } + + // dispatch type trait + typedef std::integral_constant::value && usingStdAllocator::value + > relocate_use_memcpy; + + typedef std::integral_constant::value + && usingStdAllocator::value) + || !std::is_copy_constructible::value + > relocate_use_move; + + // move + void relocate_move(T* dest, T* first, T* last) { + relocate_move_or_memcpy(dest, first, last, relocate_use_memcpy()); + } + + void relocate_move_or_memcpy(T* dest, T* first, T* last, std::true_type) { + if (first != nullptr) { + std::memcpy((void*)dest, (void*)first, (last - first) * sizeof(T)); + } } - fbvector& operator=(fbvector&& v) { - clear(); - swap(v); - return *this; + void relocate_move_or_memcpy(T* dest, T* first, T* last, std::false_type) { + relocate_move_or_copy(dest, first, last, relocate_use_move()); } - fbvector& operator=(std::initializer_list il) { - assign(il.begin(), il.end()); - return *this; + void relocate_move_or_copy(T* dest, T* first, T* last, std::true_type) { + D_uninitialized_move_a(dest, first, last); } - bool operator==(const fbvector& rhs) const { - return size() == rhs.size() && std::equal(begin(), end(), rhs.begin()); + void relocate_move_or_copy(T* dest, T* first, T* last, std::false_type) { + D_uninitialized_copy_a(dest, first, last); } - bool operator<(const fbvector& rhs) const { - return std::lexicographical_compare(begin(), end(), - rhs.begin(), rhs.end()); + // done + void relocate_done(T* /*dest*/, T* first, T* last) noexcept { + if (folly::IsRelocatable::value && usingStdAllocator::value) { + // used memcpy; data has been relocated, do not call destructor + } else { + D_destroy_range_a(first, last); + } } -private: - template - void assignImpl(InputIterator first, InputIterator last, boost::false_type) { - // Pair of iterators - if (fbvector_detail::isForwardIterator::value) { - if (b_ <= &*first && &*first < e_) { - // Aliased assign, work on the side - fbvector result(first, last); - result.swap(*this); - return; - } + // undo + void relocate_undo(T* dest, T* first, T* last) noexcept { + if (folly::IsRelocatable::value && usingStdAllocator::value) { + // used memcpy, old data is still valid, nothing to do + } else if (std::is_nothrow_move_constructible::value && + usingStdAllocator::value) { + // noexcept move everything back, aka relocate_move + relocate_move(first, dest, dest + (last - first)); + } else if (!std::is_copy_constructible::value) { + // weak guarantee + D_destroy_range_a(dest, dest + (last - first)); + } else { + // used copy, old data is still valid + D_destroy_range_a(dest, dest + (last - first)); + } + } - auto const oldSize = size(); - auto const newSize = std::distance(first, last); - if (static_cast(oldSize) >= newSize) { - // No reallocation, nice - auto const newEnd = std::copy(first, last, b_); - fbvector_detail::destroyRange(newEnd, e_); - e_ = newEnd; - return; - } + //=========================================================================== + //--------------------------------------------------------------------------- + // construct/copy/destroy + public: + fbvector() = default; + + explicit fbvector(const Allocator& a) : impl_(a) {} + + explicit fbvector(size_type n, const Allocator& a = Allocator()) + : impl_(n, a) + { M_uninitialized_fill_n_e(n); } + + fbvector(size_type n, VT value, const Allocator& a = Allocator()) + : impl_(n, a) + { M_uninitialized_fill_n_e(n, value); } - // Must reallocate - just do it on the side - auto const nBytes = goodMallocSize(newSize * sizeof(T)); - auto const b = static_cast(malloc(nBytes)); - std::uninitialized_copy(first, last, b); - this->fbvector::~fbvector(); - b_ = b; - e_ = b + newSize; - z_ = b_ + nBytes / sizeof(T); + template ::iterator_category> + fbvector(It first, It last, const Allocator& a = Allocator()) + : fbvector(first, last, a, Category()) {} + + fbvector(const fbvector& other) + : impl_(other.size(), A::select_on_container_copy_construction(other.impl_)) + { M_uninitialized_copy_e(other.begin(), other.end()); } + + fbvector(fbvector&& other) noexcept : impl_(std::move(other.impl_)) {} + + fbvector(const fbvector& other, const Allocator& a) + : fbvector(other.begin(), other.end(), a) {} + + /* may throw */ fbvector(fbvector&& other, const Allocator& a) : impl_(a) { + if (impl_ == other.impl_) { + impl_.swapData(other.impl_); } else { - // Input iterator sucks - FOR_EACH (i, *this) { - if (first == last) { - fbvector_detail::destroyRange(i, e_); - e_ = i; - return; - } - *i = *first; - ++first; - } - FOR_EACH_RANGE (i, first, last) { - push_back(*i); + impl_.init(other.size()); + M_uninitialized_move_e(other.begin(), other.end()); + } + } + + fbvector(std::initializer_list il, const Allocator& a = Allocator()) + : fbvector(il.begin(), il.end(), a) {} + + ~fbvector() = default; // the cleanup occurs in impl_ + + fbvector& operator=(const fbvector& other) { + if (UNLIKELY(this == &other)) { + return *this; + } + + if (!usingStdAllocator::value && + A::propagate_on_container_copy_assignment::value) { + if (impl_ != other.impl_) { + // can't use other's different allocator to clean up self + impl_.reset(); } + (Allocator&)impl_ = (Allocator&)other.impl_; } + + assign(other.begin(), other.end()); + return *this; } - void assignImpl(const size_type newSize, const T value, boost::true_type) { - // Arithmetic type, forward back to unambiguous definition - assign(newSize, value); + fbvector& operator=(fbvector&& other) { + if (UNLIKELY(this == &other)) { + return *this; + } + moveFrom(std::move(other), moveIsSwap()); + return *this; + } + + fbvector& operator=(std::initializer_list il) { + assign(il.begin(), il.end()); + return *this; } -public: - // Classic ambiguity (and a lot of unnecessary complexity) in - // std::vector: assign(10, 20) for vector means "assign 10 - // elements all having the value 20" but is intercepted by the - // two-iterators overload assign(first, last). So we need to - // disambiguate here. There is no pretty solution. We use here - // overloading based on is_arithmetic. Method insert has the same - // issue (and the same solution in this implementation). - template - void assign(InputIteratorOrNum first, InputIteratorOrNum last) { - assignImpl(first, last, boost::is_arithmetic()); + template ::iterator_category> + void assign(It first, It last) { + assign(first, last, Category()); } - void assign(const size_type newSize, const T& value) { - if (b_ <= &value && &value < e_) { - // Need to check for aliased assign, sigh - return assign(newSize, T(value)); + void assign(size_type n, VT value) { + if (n > capacity()) { + // Not enough space. Do not reserve in place, since we will + // discard the old values anyways. + if (dataIsInternalAndNotVT(value)) { + T copy(std::move(value)); + impl_.reset(n); + M_uninitialized_fill_n_e(n, copy); + } else { + impl_.reset(n); + M_uninitialized_fill_n_e(n, value); + } + } else if (n <= size()) { + auto newE = impl_.b_ + n; + std::fill(impl_.b_, newE, value); + M_destroy_range_e(newE); + } else { + std::fill(impl_.b_, impl_.e_, value); + M_uninitialized_fill_n_e(n - size(), value); } + } - auto const oldSize = size(); - if (oldSize >= newSize) { - // No reallocation, nice - auto const newEnd = b_ + newSize; - fbvector_detail::destroyRange(newEnd, e_); - e_ = newEnd; - return; + void assign(std::initializer_list il) { + assign(il.begin(), il.end()); + } + + allocator_type get_allocator() const noexcept { + return impl_; + } + + private: + // contract dispatch for iterator types fbvector(It first, It last) + template + fbvector(ForwardIterator first, ForwardIterator last, + const Allocator& a, std::forward_iterator_tag) + : impl_(size_type(std::distance(first, last)), a) + { M_uninitialized_copy_e(first, last); } + + template + fbvector( + InputIterator first, + InputIterator last, + const Allocator& a, + std::input_iterator_tag) + : impl_(a) { + for (; first != last; ++first) { + emplace_back(*first); } + } - // Need to reallocate - if (reserve_in_place(newSize)) { - // Careful here, fill and uninitialized_fill may throw. The - // latter is transactional, so no need to worry about a - // buffer partially filled in case of exception. - std::fill(b_, e_, value); - auto const newEnd = b_ + newSize; - std::uninitialized_fill(e_, newEnd, value); - e_ = newEnd; - return; + // contract dispatch for allocator movement in operator=(fbvector&&) + void + moveFrom(fbvector&& other, std::true_type) { + swap(impl_, other.impl_); + } + void moveFrom(fbvector&& other, std::false_type) { + if (impl_ == other.impl_) { + impl_.swapData(other.impl_); + } else { + impl_.reset(other.size()); + M_uninitialized_move_e(other.begin(), other.end()); } + } - // Cannot expand or jemalloc not present at all; must just - // allocate a new chunk and discard the old one. This is - // tantamount with creating a new fbvector altogether. This won't - // recurse infinitely; the constructor implements its own. - fbvector temp(newSize, value); - temp.swap(*this); + // contract dispatch for iterator types in assign(It first, It last) + template + void assign(ForwardIterator first, ForwardIterator last, + std::forward_iterator_tag) { + const auto newSize = size_type(std::distance(first, last)); + if (newSize > capacity()) { + impl_.reset(newSize); + M_uninitialized_copy_e(first, last); + } else if (newSize <= size()) { + auto newEnd = std::copy(first, last, impl_.b_); + M_destroy_range_e(newEnd); + } else { + auto mid = S_copy_n(impl_.b_, first, size()); + M_uninitialized_copy_e(mid, last); + } } - void assign(std::initializer_list il) { - assign(il.begin(), il.end()); + template + void assign(InputIterator first, InputIterator last, + std::input_iterator_tag) { + auto p = impl_.b_; + for (; first != last && p != impl_.e_; ++first, ++p) { + *p = *first; + } + if (p != impl_.e_) { + M_destroy_range_e(p); + } else { + for (; first != last; ++first) { + emplace_back(*first); + } + } } - allocator_type get_allocator() const { - // whatevs - return allocator_type(); + // contract dispatch for aliasing under VT optimization + bool dataIsInternalAndNotVT(const T& t) { + if (should_pass_by_value::value) { + return false; + } + return dataIsInternal(t); + } + bool dataIsInternal(const T& t) { + return UNLIKELY(impl_.b_ <= std::addressof(t) && + std::addressof(t) < impl_.e_); } -// iterators: - iterator begin() { - return b_; + + //=========================================================================== + //--------------------------------------------------------------------------- + // iterators + public: + iterator begin() noexcept { + return impl_.b_; } - const_iterator begin() const { - return b_; + const_iterator begin() const noexcept { + return impl_.b_; } - iterator end() { - return e_; + iterator end() noexcept { + return impl_.e_; } - const_iterator end() const { - return e_; + const_iterator end() const noexcept { + return impl_.e_; } - reverse_iterator rbegin() { + reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { + const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } - reverse_iterator rend() { + reverse_iterator rend() noexcept { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { + const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } - const_iterator cbegin() const { - return b_; + + const_iterator cbegin() const noexcept { + return impl_.b_; + } + const_iterator cend() const noexcept { + return impl_.e_; } - const_iterator cend() const { - return e_; + const_reverse_iterator crbegin() const noexcept { + return const_reverse_iterator(end()); + } + const_reverse_iterator crend() const noexcept { + return const_reverse_iterator(begin()); } -// 23.3.6.2 capacity: - size_type size() const { - return e_ - b_; + //=========================================================================== + //--------------------------------------------------------------------------- + // capacity + public: + size_type size() const noexcept { + return size_type(impl_.e_ - impl_.b_); } - size_type max_size() { + size_type max_size() const noexcept { // good luck gettin' there return ~size_type(0); } - void resize(const size_type sz) { - auto const oldSize = size(); - if (sz <= oldSize) { - auto const newEnd = b_ + sz; - fbvector_detail::destroyRange(newEnd, e_); - e_ = newEnd; + void resize(size_type n) { + if (n <= size()) { + M_destroy_range_e(impl_.b_ + n); } else { - // Must expand - reserve(sz); - auto newEnd = b_ + sz; - std::uninitialized_fill(e_, newEnd, T()); - e_ = newEnd; + reserve(n); + M_uninitialized_fill_n_e(n - size()); } } - void resize(const size_type sz, const T& c) { - auto const oldSize = size(); - if (sz <= oldSize) { - auto const newEnd = b_ + sz; - fbvector_detail::destroyRange(newEnd, e_); - e_ = newEnd; + void resize(size_type n, VT t) { + if (n <= size()) { + M_destroy_range_e(impl_.b_ + n); + } else if (dataIsInternalAndNotVT(t) && n > capacity()) { + T copy(t); + reserve(n); + M_uninitialized_fill_n_e(n - size(), copy); } else { - // Must expand - reserve(sz); - auto newEnd = b_ + sz; - std::uninitialized_fill(e_, newEnd, c); - e_ = newEnd; + reserve(n); + M_uninitialized_fill_n_e(n - size(), t); } } - size_type capacity() const { - return z_ - b_; + size_type capacity() const noexcept { + return size_type(impl_.z_ - impl_.b_); } - bool empty() const { - return b_ == e_; + + bool empty() const noexcept { + return impl_.b_ == impl_.e_; } -private: - bool reserve_in_place(const size_type n) { - auto const crtCapacity = capacity(); - if (n <= crtCapacity) return true; - if (!rallocm) return false; + void reserve(size_type n) { + if (n <= capacity()) { + return; + } + if (impl_.b_ && reserve_in_place(n)) { + return; + } - // using jemalloc's API. Don't forget that jemalloc can never grow - // in place blocks smaller than 4096 bytes. - auto const crtCapacityBytes = crtCapacity * sizeof(T); - if (crtCapacityBytes < jemallocMinInPlaceExpandable) return false; + auto newCap = folly::goodMallocSize(n * sizeof(T)) / sizeof(T); + auto newB = M_allocate(newCap); + try { + M_relocate(newB); + } catch (...) { + M_deallocate(newB, newCap); + throw; + } + if (impl_.b_) { + M_deallocate(impl_.b_, size_type(impl_.z_ - impl_.b_)); + } + impl_.z_ = newB + newCap; + impl_.e_ = newB + (impl_.e_ - impl_.b_); + impl_.b_ = newB; + } - auto const newCapacityBytes = goodMallocSize(n * sizeof(T)); - void* p = b_; - if (rallocm(&p, NULL, newCapacityBytes, 0, ALLOCM_NO_MOVE) - != ALLOCM_SUCCESS) { - return false; + void shrink_to_fit() noexcept { + if (empty()) { + impl_.reset(); + return; } - // Managed to expand in place, reflect that in z_ - assert(b_ == p); - z_ = b_ + newCapacityBytes / sizeof(T); - return true; - } + auto const newCapacityBytes = folly::goodMallocSize(size() * sizeof(T)); + auto const newCap = newCapacityBytes / sizeof(T); + auto const oldCap = capacity(); - void reserve_with_move(const size_type n) { - // Here we can be sure we'll need to do a full reallocation - auto const crtCapacity = capacity(); - assert(crtCapacity < n); // reserve_in_place should have taken - // care of this - auto const newCapacityBytes = goodMallocSize(n * sizeof(T)); - auto b = static_cast(malloc(newCapacityBytes)); - auto const oldSize = size(); - memcpy(b, b_, oldSize * sizeof(T)); - // Done with the old chunk. Free but don't call destructors! - free(b_); - b_ = b; - e_ = b_ + oldSize; - z_ = b_ + newCapacityBytes / sizeof(T); - // done with the old chunk - } + if (newCap >= oldCap) { + return; + } -public: - void reserve(const size_type n) { - if (reserve_in_place(n)) return; - reserve_with_move(n); + void* p = impl_.b_; + // xallocx() will shrink to precisely newCapacityBytes (which was generated + // by goodMallocSize()) if it successfully shrinks in place. + if ((usingJEMalloc() && usingStdAllocator::value) && + newCapacityBytes >= folly::jemallocMinInPlaceExpandable && + xallocx(p, newCapacityBytes, 0, 0) == newCapacityBytes) { + impl_.z_ += newCap - oldCap; + } else { + T* newB; // intentionally uninitialized + try { + newB = M_allocate(newCap); + try { + M_relocate(newB); + } catch (...) { + M_deallocate(newB, newCap); + return; // swallow the error + } + } catch (...) { + return; + } + if (impl_.b_) { + M_deallocate(impl_.b_, size_type(impl_.z_ - impl_.b_)); + } + impl_.z_ = newB + newCap; + impl_.e_ = newB + (impl_.e_ - impl_.b_); + impl_.b_ = newB; + } } - void shrink_to_fit() { - if (!rallocm) return; + private: + bool reserve_in_place(size_type n) { + if (!usingStdAllocator::value || !usingJEMalloc()) { + return false; + } - // using jemalloc's API. Don't forget that jemalloc can never - // shrink in place blocks smaller than 4096 bytes. - void* p = b_; - auto const crtCapacityBytes = capacity() * sizeof(T); - auto const newCapacityBytes = goodMallocSize(size() * sizeof(T)); - if (crtCapacityBytes >= jemallocMinInPlaceExpandable && - rallocm(&p, NULL, newCapacityBytes, 0, ALLOCM_NO_MOVE) - == ALLOCM_SUCCESS) { - // Celebrate - z_ = b_ + newCapacityBytes / sizeof(T); + // jemalloc can never grow in place blocks smaller than 4096 bytes. + if ((impl_.z_ - impl_.b_) * sizeof(T) < + folly::jemallocMinInPlaceExpandable) { + return false; } + + auto const newCapacityBytes = folly::goodMallocSize(n * sizeof(T)); + void* p = impl_.b_; + if (xallocx(p, newCapacityBytes, 0, 0) == newCapacityBytes) { + impl_.z_ = impl_.b_ + newCapacityBytes / sizeof(T); + return true; + } + return false; } -// element access + //=========================================================================== + //--------------------------------------------------------------------------- + // element access + public: reference operator[](size_type n) { assert(n < size()); - return b_[n]; + return impl_.b_[n]; } const_reference operator[](size_type n) const { assert(n < size()); - return b_[n]; + return impl_.b_[n]; } const_reference at(size_type n) const { - if (n > size()) { - throw std::out_of_range("fbvector: index is greater than size."); + if (UNLIKELY(n >= size())) { + std::__throw_out_of_range("fbvector: index is greater than size."); } return (*this)[n]; } @@ -644,293 +1083,643 @@ public: } reference front() { assert(!empty()); - return *b_; + return *impl_.b_; } const_reference front() const { assert(!empty()); - return *b_; + return *impl_.b_; } reference back() { assert(!empty()); - return e_[-1]; + return impl_.e_[-1]; } const_reference back() const { assert(!empty()); - return e_[-1]; + return impl_.e_[-1]; } -// 23.3.6.3 data access - T* data() { - return b_; - } - const T* data() const { - return b_; + //=========================================================================== + //--------------------------------------------------------------------------- + // data access + public: + T* data() noexcept { + return impl_.b_; } - -private: - size_t computePushBackCapacity() const { - return empty() ? std::max(64 / sizeof(T), size_t(1)) - : capacity() < jemallocMinInPlaceExpandable ? capacity() * 2 - : (capacity() * 3) / 2; + const T* data() const noexcept { + return impl_.b_; } -public: -// 23.3.6.4 modifiers: + //=========================================================================== + //--------------------------------------------------------------------------- + // modifiers (common) + public: template - void emplace_back(Args&&... args) { - if (e_ == z_) { - if (!reserve_in_place(size() + 1)) { - reserve_with_move(computePushBackCapacity()); - } + void emplace_back(Args&&... args) { + if (impl_.e_ != impl_.z_) { + M_construct(impl_.e_, std::forward(args)...); + ++impl_.e_; + } else { + emplace_back_aux(std::forward(args)...); } - new (e_) T(std::forward(args)...); - ++e_; } - void push_back(T x) { - if (e_ == z_) { - if (!reserve_in_place(size() + 1)) { - reserve_with_move(computePushBackCapacity()); - } + void + push_back(const T& value) { + if (impl_.e_ != impl_.z_) { + M_construct(impl_.e_, value); + ++impl_.e_; + } else { + emplace_back_aux(value); } - fbvector_detail::uninitialized_destructive_move(x, e_); - ++e_; } -private: - bool expand() { - if (!rallocm) return false; - auto const capBytes = capacity() * sizeof(T); - if (capBytes < jemallocMinInPlaceExpandable) return false; - auto const newCapBytes = goodMallocSize(capBytes + sizeof(T)); - void * bv = b_; - if (rallocm(&bv, NULL, newCapBytes, 0, ALLOCM_NO_MOVE) != ALLOCM_SUCCESS) { - return false; + void + push_back(T&& value) { + if (impl_.e_ != impl_.z_) { + M_construct(impl_.e_, std::move(value)); + ++impl_.e_; + } else { + emplace_back_aux(std::move(value)); } - // Managed to expand in place - assert(bv == b_); // nothing moved - z_ = b_ + newCapBytes / sizeof(T); - assert(capacity() > capBytes / sizeof(T)); - return true; } -public: void pop_back() { assert(!empty()); - --e_; - if (!boost::has_trivial_destructor::value) { - e_->T::~T(); - } - } - // template - // iterator emplace(const_iterator position, Args&&... args); - - iterator insert(const_iterator position, T x) { - size_t newSize; // intentionally uninitialized - if (e_ == z_ && !reserve_in_place(newSize = size() + 1)) { - // Can't reserve in place, make a copy - auto const offset = position - cbegin(); - fbvector tmp; - tmp.reserve(newSize); - memcpy(tmp.b_, b_, offset * sizeof(T)); - fbvector_detail::uninitialized_destructive_move( - x, - tmp.b_ + offset); - memcpy(tmp.b_ + offset + 1, b_ + offset, (size() - offset) * sizeof(T)); - // Brutally reassign this to refer to tmp's guts - free(b_); - b_ = tmp.b_; - e_ = b_ + newSize; - z_ = tmp.z_; - // get rid of tmp's guts - new(&tmp) fbvector; - return begin() + offset; - } - // Here we have enough room - memmove(const_cast(&*position) + 1, - const_cast(&*position), - sizeof(T) * (e_ - position)); - fbvector_detail::uninitialized_destructive_move( - x, - const_cast(&*position)); - ++e_; - return const_cast(position); - } - - iterator insert(const_iterator position, const size_type n, const T& x) { - if (e_ + n >= z_) { - if (b_ <= &x && &x < e_) { - // Ew, aliased insert - auto copy = x; - return insert(position, n, copy); + --impl_.e_; + M_destroy(impl_.e_); + } + + void swap(fbvector& other) noexcept { + if (!usingStdAllocator::value && A::propagate_on_container_swap::value) { + swap(impl_, other.impl_); + } else { + impl_.swapData(other.impl_); + } + } + + void clear() noexcept { + M_destroy_range_e(impl_.b_); + } + + private: + // std::vector implements a similar function with a different growth + // strategy: empty() ? 1 : capacity() * 2. + // + // fbvector grows differently on two counts: + // + // (1) initial size + // Instead of growing to size 1 from empty, fbvector allocates at least + // 64 bytes. You may still use reserve to reserve a lesser amount of + // memory. + // (2) 1.5x + // For medium-sized vectors, the growth strategy is 1.5x. See the docs + // for details. + // This does not apply to very small or very large fbvectors. This is a + // heuristic. + // A nice addition to fbvector would be the capability of having a user- + // defined growth strategy, probably as part of the allocator. + // + + size_type computePushBackCapacity() const { + if (capacity() == 0) { + return std::max(64 / sizeof(T), size_type(1)); + } + if (capacity() < folly::jemallocMinInPlaceExpandable / sizeof(T)) { + return capacity() * 2; + } + if (capacity() > 4096 * 32 / sizeof(T)) { + return capacity() * 2; + } + return (capacity() * 3 + 1) / 2; + } + + template + void emplace_back_aux(Args&&... args); + + //=========================================================================== + //--------------------------------------------------------------------------- + // modifiers (erase) + public: + iterator erase(const_iterator position) { + return erase(position, position + 1); + } + + iterator erase(const_iterator first, const_iterator last) { + assert(isValid(first) && isValid(last)); + assert(first <= last); + if (first != last) { + if (last == end()) { + M_destroy_range_e((iterator)first); + } else { + if (folly::IsRelocatable::value && usingStdAllocator::value) { + D_destroy_range_a((iterator)first, (iterator)last); + if (last - first >= cend() - last) { + std::memcpy((void*)first, (void*)last, (cend() - last) * sizeof(T)); + } else { + std::memmove((iterator)first, last, (cend() - last) * sizeof(T)); + } + impl_.e_ -= (last - first); + } else { + std::copy(std::make_move_iterator((iterator)last), + std::make_move_iterator(end()), (iterator)first); + auto newEnd = impl_.e_ - std::distance(first, last); + M_destroy_range_e(newEnd); + } } - auto const m = position - b_; - reserve(size() + n); - position = b_ + m; - } - memmove(const_cast(position) + n, - position, - sizeof(T) * (e_ - position)); - if (boost::has_trivial_copy::value) { - std::uninitialized_fill(const_cast(position), - const_cast(position) + n, - x); + } + return (iterator)first; + } + + //=========================================================================== + //--------------------------------------------------------------------------- + // modifiers (insert) + private: // we have the private section first because it defines some macros + bool isValid(const_iterator it) { + return cbegin() <= it && it <= cend(); + } + + size_type computeInsertCapacity(size_type n) { + size_type nc = std::max(computePushBackCapacity(), size() + n); + size_type ac = folly::goodMallocSize(nc * sizeof(T)) / sizeof(T); + return ac; + } + + //--------------------------------------------------------------------------- + // + // make_window takes an fbvector, and creates an uninitialized gap (a + // window) at the given position, of the given size. The fbvector must + // have enough capacity. + // + // Explanation by picture. + // + // 123456789______ + // ^ + // make_window here of size 3 + // + // 1234___56789___ + // + // If something goes wrong and the window must be destroyed, use + // undo_window to provide a weak exception guarantee. It destroys + // the right ledge. + // + // 1234___________ + // + //--------------------------------------------------------------------------- + // + // wrap_frame takes an inverse window and relocates an fbvector around it. + // The fbvector must have at least as many elements as the left ledge. + // + // Explanation by picture. + // + // START + // fbvector: inverse window: + // 123456789______ _____abcde_______ + // [idx][ n ] + // + // RESULT + // _______________ 12345abcde6789___ + // + //--------------------------------------------------------------------------- + // + // insert_use_fresh_memory returns true iff the fbvector should use a fresh + // block of memory for the insertion. If the fbvector does not have enough + // spare capacity, then it must return true. Otherwise either true or false + // may be returned. + // + //--------------------------------------------------------------------------- + // + // These three functions, make_window, wrap_frame, and + // insert_use_fresh_memory, can be combined into a uniform interface. + // Since that interface involves a lot of case-work, it is built into + // some macros: FOLLY_FBVECTOR_INSERT_(PRE|START|TRY|END) + // Macros are used in an attempt to let GCC perform better optimizations, + // especially control flow optimization. + // + + //--------------------------------------------------------------------------- + // window + + void make_window(iterator position, size_type n) { + // The result is guaranteed to be non-negative, so use an unsigned type: + size_type tail = size_type(std::distance(position, impl_.e_)); + + if (tail <= n) { + relocate_move(position + n, position, impl_.e_); + relocate_done(position + n, position, impl_.e_); + impl_.e_ += n; } else { - try { - std::uninitialized_fill(const_cast(position), - const_cast(position) + n, - x); - } catch (...) { - // Oops, put things back where they were - memmove(const_cast(position), - position + n, - sizeof(T) * (e_ - position)); - throw; + if (folly::IsRelocatable::value && usingStdAllocator::value) { + std::memmove(position + n, position, tail * sizeof(T)); + impl_.e_ += n; + } else { + D_uninitialized_move_a(impl_.e_, impl_.e_ - n, impl_.e_); + try { + std::copy_backward(std::make_move_iterator(position), + std::make_move_iterator(impl_.e_ - n), impl_.e_); + } catch (...) { + D_destroy_range_a(impl_.e_ - n, impl_.e_ + n); + impl_.e_ -= n; + throw; + } + impl_.e_ += n; + D_destroy_range_a(position, position + n); } } - e_ += n; - return const_cast(position); } -private: - template - iterator insertImpl(const_iterator position, - InputIterator first, InputIterator last, - boost::false_type) { - // Pair of iterators - if (fbvector_detail::isForwardIterator::value) { - // Can compute distance - auto const n = std::distance(first, last); - if (e_ + n >= z_) { - if (b_ <= &*first && &*first < e_) { - // Ew, aliased insert - goto conservative; + void undo_window(iterator position, size_type n) noexcept { + D_destroy_range_a(position + n, impl_.e_); + impl_.e_ = position; + } + + //--------------------------------------------------------------------------- + // frame + + void wrap_frame(T* ledge, size_type idx, size_type n) { + assert(size() >= idx); + assert(n != 0); + + relocate_move(ledge, impl_.b_, impl_.b_ + idx); + try { + relocate_move(ledge + idx + n, impl_.b_ + idx, impl_.e_); + } catch (...) { + relocate_undo(ledge, impl_.b_, impl_.b_ + idx); + throw; + } + relocate_done(ledge, impl_.b_, impl_.b_ + idx); + relocate_done(ledge + idx + n, impl_.b_ + idx, impl_.e_); + } + + //--------------------------------------------------------------------------- + // use fresh? + + bool insert_use_fresh(bool at_end, size_type n) { + if (at_end) { + if (size() + n <= capacity()) { + return false; + } + if (reserve_in_place(size() + n)) { + return false; + } + return true; + } + + if (size() + n > capacity()) { + return true; + } + + return false; + } + + //--------------------------------------------------------------------------- + // interface + + template < + typename IsInternalFunc, + typename InsertInternalFunc, + typename ConstructFunc, + typename DestroyFunc> + iterator do_real_insert( + const_iterator cpos, + size_type n, + IsInternalFunc&& isInternalFunc, + InsertInternalFunc&& insertInternalFunc, + ConstructFunc&& constructFunc, + DestroyFunc&& destroyFunc) { + if (n == 0) { + return iterator(cpos); + } + bool at_end = cpos == cend(); + bool fresh = insert_use_fresh(at_end, n); + if (!at_end) { + if (!fresh && isInternalFunc()) { + // check for internal data (technically not required by the standard) + return insertInternalFunc(); + } + assert(isValid(cpos)); + } + T* position = const_cast(cpos); + size_type idx = size_type(std::distance(impl_.b_, position)); + T* b; + size_type newCap; /* intentionally uninitialized */ + + if (fresh) { + newCap = computeInsertCapacity(n); + b = M_allocate(newCap); + } else { + if (!at_end) { + make_window(position, n); + } else { + impl_.e_ += n; + } + b = impl_.b_; + } + + T* start = b + idx; + try { + // construct the inserted elements + constructFunc(start); + } catch (...) { + if (fresh) { + M_deallocate(b, newCap); + } else { + if (!at_end) { + undo_window(position, n); + } else { + impl_.e_ -= n; } - auto const m = position - b_; - reserve(size() + n); - position = b_ + m; } - memmove(const_cast(position) + n, - position, - sizeof(T) * (e_ - position)); + throw; + } + + if (fresh) { try { - std::uninitialized_copy(first, last, - const_cast(position)); + wrap_frame(b, idx, n); } catch (...) { - // Oops, put things back where they were - memmove(const_cast(position), - position + n, - sizeof(T) * (e_ - position)); + // delete the inserted elements (exception has been thrown) + destroyFunc(start); + M_deallocate(b, newCap); throw; } - e_ += n; - return const_cast(position); - } else { - // Cannot compute distance, crappy approach - // TODO: OPTIMIZE - conservative: - fbvector result(cbegin(), position); - auto const offset = result.size(); - FOR_EACH_RANGE (i, first, last) { - result.push_back(*i); + if (impl_.b_) { + M_deallocate(impl_.b_, capacity()); } - result.insert(result.end(), position, cend()); - result.swap(*this); - return begin() + offset; + impl_.set(b, size() + n, newCap); + return impl_.b_ + idx; + } else { + return position; } } - iterator insertImpl(const_iterator position, - const size_type count, const T value, boost::true_type) { - // Forward back to unambiguous function - return insert(position, count, value); + public: + template + iterator emplace(const_iterator cpos, Args&&... args) { + return do_real_insert( + cpos, + 1, + [&] { return false; }, + [&] { return iterator{}; }, + [&](iterator start) { + M_construct(start, std::forward(args)...); + }, + [&](iterator start) { M_destroy(start); }); + } + + iterator insert(const_iterator cpos, const T& value) { + return do_real_insert( + cpos, + 1, + [&] { return dataIsInternal(value); }, + [&] { return insert(cpos, T(value)); }, + [&](iterator start) { M_construct(start, value); }, + [&](iterator start) { M_destroy(start); }); + } + + iterator insert(const_iterator cpos, T&& value) { + return do_real_insert( + cpos, + 1, + [&] { return dataIsInternal(value); }, + [&] { return insert(cpos, T(std::move(value))); }, + [&](iterator start) { M_construct(start, std::move(value)); }, + [&](iterator start) { M_destroy(start); }); + } + + iterator insert(const_iterator cpos, size_type n, VT value) { + return do_real_insert( + cpos, + n, + [&] { return dataIsInternalAndNotVT(value); }, + [&] { return insert(cpos, n, T(value)); }, + [&](iterator start) { D_uninitialized_fill_n_a(start, n, value); }, + [&](iterator start) { D_destroy_range_a(start, start + n); }); + } + + template ::iterator_category> + iterator insert(const_iterator cpos, It first, It last) { + return insert(cpos, first, last, Category()); + } + + iterator insert(const_iterator cpos, std::initializer_list il) { + return insert(cpos, il.begin(), il.end()); + } + + //--------------------------------------------------------------------------- + // insert dispatch for iterator types + private: + template + iterator insert(const_iterator cpos, FIt first, FIt last, + std::forward_iterator_tag) { + size_type n = size_type(std::distance(first, last)); + return do_real_insert( + cpos, + n, + [&] { return false; }, + [&] { return iterator{}; }, + [&](iterator start) { D_uninitialized_copy_a(start, first, last); }, + [&](iterator start) { D_destroy_range_a(start, start + n); }); + } + + template + iterator insert(const_iterator cpos, IIt first, IIt last, + std::input_iterator_tag) { + T* position = const_cast(cpos); + assert(isValid(position)); + size_type idx = std::distance(begin(), position); + + fbvector storage(std::make_move_iterator(position), + std::make_move_iterator(end()), + A::select_on_container_copy_construction(impl_)); + M_destroy_range_e(position); + for (; first != last; ++first) { + emplace_back(*first); + } + insert(cend(), std::make_move_iterator(storage.begin()), + std::make_move_iterator(storage.end())); + return impl_.b_ + idx; } -public: - template - iterator insert(const_iterator position, InputIteratorOrNum first, - InputIteratorOrNum last) { - return insertImpl(position, first, last, - boost::is_arithmetic()); + //=========================================================================== + //--------------------------------------------------------------------------- + // lexicographical functions + public: + bool operator==(const fbvector& other) const { + return size() == other.size() && std::equal(begin(), end(), other.begin()); } - iterator insert(const_iterator position, std::initializer_list il) { - return insert(position, il.begin(), il.end()); + bool operator!=(const fbvector& other) const { + return !(*this == other); } - iterator erase(const_iterator position) { - if (position == e_) return e_; - auto p = const_cast(position); - (*p).T::~T(); - memmove(p, p + 1, sizeof(T) * (e_ - p - 1)); - --e_; - return p; + bool operator<(const fbvector& other) const { + return std::lexicographical_compare( + begin(), end(), other.begin(), other.end()); } - iterator erase(const_iterator first, const_iterator last) { - assert(first <= last); - auto p1 = const_cast(first); - auto p2 = const_cast(last); - fbvector_detail::destroyRange(p1, p2); - memmove(p1, last, sizeof(T) * (e_ - last)); - e_ -= last - first; - return p1; + bool operator>(const fbvector& other) const { + return other < *this; } - void swap(fbvector& rhs) { - std::swap(b_, rhs.b_); - std::swap(e_, rhs.e_); - std::swap(z_, rhs.z_); + bool operator<=(const fbvector& other) const { + return !(*this > other); } - void clear() { - fbvector_detail::destroyRange(b_, e_); - e_ = b_; + bool operator>=(const fbvector& other) const { + return !(*this < other); } -private: - // Data - T *b_, *e_, *z_; + //=========================================================================== + //--------------------------------------------------------------------------- + // friends + private: + template + friend _T* relinquish(fbvector<_T, _A>&); + + template + friend void attach(fbvector<_T, _A>&, _T* data, size_t sz, size_t cap); + +}; // class fbvector + + +//============================================================================= +//----------------------------------------------------------------------------- +// outlined functions (gcc, you finicky compiler you) + +template +template +void fbvector::emplace_back_aux(Args&&... args) { + size_type byte_sz = folly::goodMallocSize( + computePushBackCapacity() * sizeof(T)); + if (usingStdAllocator::value + && usingJEMalloc() + && ((impl_.z_ - impl_.b_) * sizeof(T) >= + folly::jemallocMinInPlaceExpandable)) { + // Try to reserve in place. + // Ask xallocx to allocate in place at least size()+1 and at most sz space. + // xallocx will allocate as much as possible within that range, which + // is the best possible outcome: if sz space is available, take it all, + // otherwise take as much as possible. If nothing is available, then fail. + // In this fashion, we never relocate if there is a possibility of + // expanding in place, and we never reallocate by less than the desired + // amount unless we cannot expand further. Hence we will not reallocate + // sub-optimally twice in a row (modulo the blocking memory being freed). + size_type lower = folly::goodMallocSize(sizeof(T) + size() * sizeof(T)); + size_type upper = byte_sz; + size_type extra = upper - lower; + + void* p = impl_.b_; + size_t actual; + + if ((actual = xallocx(p, lower, extra, 0)) >= lower) { + impl_.z_ = impl_.b_ + actual / sizeof(T); + M_construct(impl_.e_, std::forward(args)...); + ++impl_.e_; + return; + } + } + + // Reallocation failed. Perform a manual relocation. + size_type sz = byte_sz / sizeof(T); + auto newB = M_allocate(sz); + auto newE = newB + size(); + try { + if (folly::IsRelocatable::value && usingStdAllocator::value) { + // For linear memory access, relocate before construction. + // By the test condition, relocate is noexcept. + // Note that there is no cleanup to do if M_construct throws - that's + // one of the beauties of relocation. + // Benchmarks for this code have high variance, and seem to be close. + relocate_move(newB, impl_.b_, impl_.e_); + M_construct(newE, std::forward(args)...); + ++newE; + } else { + M_construct(newE, std::forward(args)...); + ++newE; + try { + M_relocate(newB); + } catch (...) { + M_destroy(newE - 1); + throw; + } + } + } catch (...) { + M_deallocate(newB, sz); + throw; + } + if (impl_.b_) { + M_deallocate(impl_.b_, size()); + } + impl_.b_ = newB; + impl_.e_ = newE; + impl_.z_ = newB + sz; +} + +//============================================================================= +//----------------------------------------------------------------------------- +// specialized functions + +template +void swap(fbvector& lhs, fbvector& rhs) noexcept { + lhs.swap(rhs); +} + +//============================================================================= +//----------------------------------------------------------------------------- +// other + +namespace detail { + +// Format support. +template +struct IndexableTraits> + : public IndexableTraitsSeq> { }; +} // namespace detail + template -bool operator!=(const fbvector& lhs, - const fbvector& rhs) { - return !(lhs == rhs); +void compactResize(fbvector* v, size_t sz) { + v->resize(sz); + v->shrink_to_fit(); } +// DANGER +// +// relinquish and attach are not a members function specifically so that it is +// awkward to call them. It is very easy to shoot yourself in the foot with +// these functions. +// +// If you call relinquish, then it is your responsibility to free the data +// and the storage, both of which may have been generated in a non-standard +// way through the fbvector's allocator. +// +// If you call attach, it is your responsibility to ensure that the fbvector +// is fresh (size and capacity both zero), and that the supplied data is +// capable of being manipulated by the allocator. +// It is acceptable to supply a stack pointer IF: +// (1) The vector's data does not outlive the stack pointer. This includes +// extension of the data's life through a move operation. +// (2) The pointer has enough capacity that the vector will never be +// relocated. +// (3) Insert is not called on the vector; these functions have leeway to +// relocate the vector even if there is enough capacity. +// (4) A stack pointer is compatible with the fbvector's allocator. +// + template -void swap(fbvector& lhs, fbvector& rhs) { - lhs.swap(rhs); +T* relinquish(fbvector& v) { + T* ret = v.data(); + v.impl_.b_ = v.impl_.e_ = v.impl_.z_ = nullptr; + return ret; } -/** - * Resizes *v to exactly n elements. May reallocate the vector to a - * smaller buffer if too much space will be left unused. - */ -template -static void compactResize(folly::fbvector * v, size_t size) { - auto const oldCap = v->capacity(); - if (oldCap > size + 1024 && size < oldCap * 0.3) { - // Too much slack memory, reallocate a smaller buffer - auto const oldSize = v->size(); - if (size <= oldSize) { - // Shrink - folly::fbvector(v->begin(), v->begin() + size).swap(*v); - } else { - // Expand - folly::fbvector temp; - temp.reserve(size); - copy(v->begin(), v->end(), back_inserter(temp)); - temp.resize(size); - temp.swap(*v); - } - } else { - // Nolo contendere - v->resize(size); - } +template +void attach(fbvector& v, T* data, size_t sz, size_t cap) { + assert(v.data() == nullptr); + v.impl_.b_ = data; + v.impl_.e_ = data + sz; + v.impl_.z_ = data + cap; } } // namespace folly - -#endif // FOLLY_FBVECTOR_H_