From dbd70eed51804682cf2d6875bcbe580771688588 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 9 Sep 2016 16:19:15 -0700 Subject: [PATCH] Refactor basic_fbstring Summary: Move fbstring_core and basic_fbstring member functions from in-situ to out-of-class inlines and refactor for readability. Remove superfluous dependence on scope_guard. Reviewed By: ot, luciang Differential Revision: D3795250 fbshipit-source-id: f6edca25d4771181faff9e0a4339bbaffd71a370 --- folly/FBString.h | 1791 +++++++++++++++++++++++++++------------------- 1 file changed, 1067 insertions(+), 724 deletions(-) diff --git a/folly/FBString.h b/folly/FBString.h index d02d12e2..7a9d6ede 100644 --- a/folly/FBString.h +++ b/folly/FBString.h @@ -48,15 +48,15 @@ #include #endif -#include -#include -#include #include +#include +#include +#include +#include -#include -#include #include -#include +#include +#include #if FOLLY_HAVE_DEPRECATED_ASSOC #ifdef _GLIBCXX_SYMVER @@ -122,18 +122,18 @@ namespace folly { namespace fbstring_detail { template -inline -OutIt copy_n(InIt b, - typename std::iterator_traits::difference_type n, - OutIt d) { +inline std::pair copy_n( + InIt b, + typename std::iterator_traits::difference_type n, + OutIt d) { for (; n != 0; --n, ++b, ++d) { *d = *b; } - return d; + return std::make_pair(b, d); } template -inline void pod_fill(Pod* b, Pod* e, T c) { +inline void podFill(Pod* b, Pod* e, T c) { assert(b && e && b <= e); /*static*/ if (sizeof(T) == 1) { memset(b, c, e - b); @@ -165,7 +165,7 @@ inline void pod_fill(Pod* b, Pod* e, T c) { * adaptation outside). */ template -inline void pod_copy(const Pod* b, const Pod* e, Pod* d) { +inline void podCopy(const Pod* b, const Pod* e, Pod* d) { assert(e >= b); assert(d >= e || d + (e - b) <= b); memcpy(d, b, (e - b) * sizeof(Pod)); @@ -176,11 +176,31 @@ inline void pod_copy(const Pod* b, const Pod* e, Pod* d) { * some asserts */ template -inline void pod_move(const Pod* b, const Pod* e, Pod* d) { +inline void podMove(const Pod* b, const Pod* e, Pod* d) { assert(e >= b); memmove(d, b, (e - b) * sizeof(*b)); } +// always inline +#if defined(__GNUC__) // Clang also defines __GNUC__ +# define FBSTRING_ALWAYS_INLINE inline __attribute__((__always_inline__)) +#elif defined(_MSC_VER) +# define FBSTRING_ALWAYS_INLINE __forceinline +#else +# define FBSTRING_ALWAYS_INLINE inline +#endif + +[[noreturn]] FBSTRING_ALWAYS_INLINE void assume_unreachable() { +#if defined(__GNUC__) // Clang also defines __GNUC__ + __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(0); +#else + // Well, it's better than nothing. + std::abort(); +#endif +} + } // namespace fbstring_detail /** @@ -216,7 +236,7 @@ public: // e.g. reference-counted implementation to fork the data. The // pointer is guaranteed to be valid until the next call to a // non-const member function. - Char * mutable_data(); + Char* mutableData(); // Returns a pointer to string's buffer and guarantees that a // readable '\0' lies right after the buffer. The pointer is // guaranteed to be valid until the next call to a non-const member @@ -234,7 +254,7 @@ public: // If expGrowth is true, exponential growth is guaranteed. // It is not guaranteed not to reallocate even if size() + delta < // capacity(), so all references to the buffer are invalidated. - Char* expand_noinit(size_t delta, bool expGrowth); + Char* expandNoinit(size_t delta, bool expGrowth); // Expands the string by one character and sets the last character // to c. void push_back(Char c); @@ -302,39 +322,18 @@ public: fbstring_core(const fbstring_core & rhs) { assert(&rhs != this); - // Simplest case first: small strings are bitblitted - if (rhs.category() == Category::isSmall) { - static_assert(offsetof(MediumLarge, data_) == 0, - "fbstring layout failure"); - static_assert(offsetof(MediumLarge, size_) == sizeof(ml_.data_), - "fbstring layout failure"); - static_assert(offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_), - "fbstring layout failure"); - // Just write the whole thing, don't look at details. In - // particular we need to copy capacity anyway because we want - // to set the size (don't forget that the last character, - // which stores a short string's length, is shared with the - // ml_.capacity field). - ml_ = rhs.ml_; - assert(category() == Category::isSmall && this->size() == rhs.size()); - } else if (rhs.category() == Category::isLarge) { - // Large strings are just refcounted - ml_ = rhs.ml_; - RefCounted::incrementRefs(ml_.data_); - assert(category() == Category::isLarge && size() == rhs.size()); - } else { - // Medium strings are copied eagerly. Don't forget to allocate - // one extra Char for the null terminator. - auto const allocSize = - goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char)); - ml_.data_ = static_cast(checkedMalloc(allocSize)); - // Also copies terminator. - fbstring_detail::pod_copy(rhs.ml_.data_, - rhs.ml_.data_ + rhs.ml_.size_ + 1, - ml_.data_); - ml_.size_ = rhs.ml_.size_; - ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); - assert(category() == Category::isMedium); + switch (rhs.category()) { + case Category::isSmall: + copySmall(rhs); + break; + case Category::isMedium: + copyMedium(rhs); + break; + case Category::isLarge: + copyLarge(rhs); + break; + default: + fbstring_detail::assume_unreachable(); } assert(size() == rhs.size()); assert(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0); @@ -350,72 +349,19 @@ public: fbstring_core(const Char *const data, const size_t size, bool disableSSO = FBSTRING_DISABLE_SSO) { + if (!disableSSO && size <= maxSmallSize) { + initSmall(data, size); + } else if (size <= maxMediumSize) { + initMedium(data, size); + } else { + initLarge(data, size); + } #ifndef NDEBUG #ifndef _LIBSTDCXX_FBSTRING - SCOPE_EXIT { - assert(this->size() == size); - assert(size == 0 || memcmp(this->data(), data, size * sizeof(Char)) == 0); - }; + assert(this->size() == size); + assert(size == 0 || memcmp(this->data(), data, size * sizeof(Char)) == 0); #endif #endif - - // Simplest case first: small strings are bitblitted - if (!disableSSO && size <= maxSmallSize) { - // Layout is: Char* data_, size_t size_, size_t capacity_ - static_assert(sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t), - "fbstring has unexpected size"); - static_assert(sizeof(Char*) == sizeof(size_t), - "fbstring size assumption violation"); - // sizeof(size_t) must be a power of 2 - static_assert((sizeof(size_t) & (sizeof(size_t) - 1)) == 0, - "fbstring size assumption violation"); - - // If data is aligned, use fast word-wise copying. Otherwise, - // use conservative memcpy. - // The word-wise path reads bytes which are outside the range of - // the string, and makes ASan unhappy, so we disable it when - // compiling with ASan. -#ifndef FBSTRING_SANITIZE_ADDRESS - if ((reinterpret_cast(data) & (sizeof(size_t) - 1)) == 0) { - const size_t byteSize = size * sizeof(Char); - constexpr size_t wordWidth = sizeof(size_t); - switch ((byteSize + wordWidth - 1) / wordWidth) { // Number of words. - case 3: - ml_.capacity_ = reinterpret_cast(data)[2]; - case 2: - ml_.size_ = reinterpret_cast(data)[1]; - case 1: - ml_.data_ = *reinterpret_cast(const_cast(data)); - case 0: - break; - } - } else -#endif - { - if (size != 0) { - fbstring_detail::pod_copy(data, data + size, small_); - } - } - setSmallSize(size); - } else { - if (size <= maxMediumSize) { - // Medium strings are allocated normally. Don't forget to - // allocate one extra Char for the terminating null. - auto const allocSize = goodMallocSize((1 + size) * sizeof(Char)); - ml_.data_ = static_cast(checkedMalloc(allocSize)); - fbstring_detail::pod_copy(data, data + size, ml_.data_); - ml_.size_ = size; - ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); - } else { - // Large strings are allocated differently - size_t effectiveCapacity = size; - auto const newRC = RefCounted::create(data, & effectiveCapacity); - ml_.data_ = newRC->data_; - ml_.size_ = size; - ml_.setCapacity(effectiveCapacity, Category::isLarge); - } - ml_.data_[size] = '\0'; - } } ~fbstring_core() noexcept { @@ -471,26 +417,16 @@ public: return c_str(); } - Char * mutable_data() { - auto const c = category(); - if (c == Category::isSmall) { + Char* mutableData() { + switch (category()) { + case Category::isSmall: return small_; + case Category::isMedium: + return ml_.data_; + case Category::isLarge: + return mutableDataLarge(); } - assert(c == Category::isMedium || c == Category::isLarge); - if (c == Category::isLarge && RefCounted::refs(ml_.data_) > 1) { - // Ensure unique. - size_t effectiveCapacity = ml_.capacity(); - auto const newRC = RefCounted::create(& effectiveCapacity); - // If this fails, someone placed the wrong capacity in an - // fbstring. - assert(effectiveCapacity >= ml_.capacity()); - // Also copies terminator. - fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1, - newRC->data_); - RefCounted::decrementRefs(ml_.data_); - ml_.data_ = newRC->data_; - } - return ml_.data_; + fbstring_detail::assume_unreachable(); } const Char * c_str() const { @@ -506,152 +442,39 @@ public: void shrink(const size_t delta) { if (category() == Category::isSmall) { - // Check for underflow - assert(delta <= smallSize()); - setSmallSize(smallSize() - delta); + shrinkSmall(delta); } else if (category() == Category::isMedium || RefCounted::refs(ml_.data_) == 1) { - // Medium strings and unique large strings need no special - // handling. - assert(ml_.size_ >= delta); - ml_.size_ -= delta; - ml_.data_[ml_.size_] = '\0'; + shrinkMedium(delta); } else { - assert(ml_.size_ >= delta); - // Shared large string, must make unique. This is because of the - // durn terminator must be written, which may trample the shared - // data. - if (delta) { - fbstring_core(ml_.data_, ml_.size_ - delta).swap(*this); - } - // No need to write the terminator. + shrinkLarge(delta); } } void reserve(size_t minCapacity, bool disableSSO = FBSTRING_DISABLE_SSO) { - if (category() == Category::isLarge) { - // Ensure unique - if (RefCounted::refs(ml_.data_) > 1) { - // We must make it unique regardless; in-place reallocation is - // useless if the string is shared. In order to not surprise - // people, reserve the new block at current capacity or - // more. That way, a string's capacity never shrinks after a - // call to reserve. - minCapacity = std::max(minCapacity, ml_.capacity()); - auto const newRC = RefCounted::create(& minCapacity); - // Also copies terminator. - fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1, - newRC->data_); - RefCounted::decrementRefs(ml_.data_); - ml_.data_ = newRC->data_; - ml_.setCapacity(minCapacity, Category::isLarge); - // size remains unchanged - } else { - // String is not shared, so let's try to realloc (if needed) - if (minCapacity > ml_.capacity()) { - // Asking for more memory - auto const newRC = - RefCounted::reallocate(ml_.data_, ml_.size_, - ml_.capacity(), minCapacity); - ml_.data_ = newRC->data_; - ml_.setCapacity(minCapacity, Category::isLarge); - } - assert(capacity() >= minCapacity); - } - } else if (category() == Category::isMedium) { - // String is not shared - if (minCapacity <= ml_.capacity()) { - return; // nothing to do, there's enough room - } - if (minCapacity <= maxMediumSize) { - // Keep the string at medium size. Don't forget to allocate - // one extra Char for the terminating null. - size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); - // Also copies terminator. - ml_.data_ = static_cast( - smartRealloc( - ml_.data_, - (ml_.size_ + 1) * sizeof(Char), - (ml_.capacity() + 1) * sizeof(Char), - capacityBytes)); - ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium); - } else { - // Conversion from medium to large string - fbstring_core nascent; - // Will recurse to another branch of this function - nascent.reserve(minCapacity); - nascent.ml_.size_ = ml_.size_; - // Also copies terminator. - fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1, - nascent.ml_.data_); - nascent.swap(*this); - assert(capacity() >= minCapacity); - } - } else { - assert(category() == Category::isSmall); - if (!disableSSO && minCapacity <= maxSmallSize) { - // small - // Nothing to do, everything stays put - } else if (minCapacity <= maxMediumSize) { - // medium - // Don't forget to allocate one extra Char for the terminating null - auto const allocSizeBytes = - goodMallocSize((1 + minCapacity) * sizeof(Char)); - auto const pData = static_cast(checkedMalloc(allocSizeBytes)); - auto const size = smallSize(); - // Also copies terminator. - fbstring_detail::pod_copy(small_, small_ + size + 1, pData); - ml_.data_ = pData; - ml_.size_ = size; - ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium); - } else { - // large - auto const newRC = RefCounted::create(& minCapacity); - auto const size = smallSize(); - // Also copies terminator. - fbstring_detail::pod_copy(small_, small_ + size + 1, newRC->data_); - ml_.data_ = newRC->data_; - ml_.size_ = size; - ml_.setCapacity(minCapacity, Category::isLarge); - assert(capacity() >= minCapacity); - } + switch (category()) { + case Category::isSmall: + reserveSmall(minCapacity, disableSSO); + break; + case Category::isMedium: + reserveMedium(minCapacity); + break; + case Category::isLarge: + reserveLarge(minCapacity); + break; + default: + fbstring_detail::assume_unreachable(); } assert(capacity() >= minCapacity); } - Char * expand_noinit(const size_t delta, - bool expGrowth = false, - bool disableSSO = FBSTRING_DISABLE_SSO) { - // Strategy is simple: make room, then change size - assert(capacity() >= size()); - size_t sz, newSz; - if (category() == Category::isSmall) { - sz = smallSize(); - newSz = sz + delta; - if (!disableSSO && FBSTRING_LIKELY(newSz <= maxSmallSize)) { - setSmallSize(newSz); - return small_ + sz; - } - reserve(expGrowth ? std::max(newSz, 2 * maxSmallSize) : newSz); - } else { - sz = ml_.size_; - newSz = sz + delta; - if (FBSTRING_UNLIKELY(newSz > capacity())) { - // ensures not shared - reserve(expGrowth ? std::max(newSz, 1 + capacity() * 3 / 2) : newSz); - } - } - assert(capacity() >= newSz); - // Category can't be small - we took care of that above - assert(category() == Category::isMedium || category() == Category::isLarge); - ml_.size_ = newSz; - ml_.data_[newSz] = '\0'; - assert(size() == newSz); - return ml_.data_ + sz; - } + Char* expandNoinit( + const size_t delta, + bool expGrowth = false, + bool disableSSO = FBSTRING_DISABLE_SSO); void push_back(Char c) { - *expand_noinit(1, /* expGrowth = */ true) = c; + *expandNoinit(1, /* expGrowth = */ true) = c; } size_t size() const { @@ -666,7 +489,9 @@ public: // For large-sized strings, a multi-referenced chunk has no // available capacity. This is because any attempt to append // data would trigger a new allocation. - if (RefCounted::refs(ml_.data_) > 1) return ml_.size_; + if (RefCounted::refs(ml_.data_) > 1) { + return ml_.size_; + } default: {} } return ml_.capacity(); @@ -733,7 +558,7 @@ private: static RefCounted * create(const Char * data, size_t * size) { const size_t effectiveSize = *size; auto result = create(size); - fbstring_detail::pod_copy(data, data + effectiveSize, result->data_); + fbstring_detail::podCopy(data, data + effectiveSize, result->data_); return result; } @@ -831,8 +656,312 @@ private: small_[s] = '\0'; assert(category() == Category::isSmall && size() == s); } + + void copySmall(const fbstring_core&); + void copyMedium(const fbstring_core&); + void copyLarge(const fbstring_core&); + + void initSmall(const Char* data, size_t size); + void initMedium(const Char* data, size_t size); + void initLarge(const Char* data, size_t size); + + void reserveSmall(size_t minCapacity, bool disableSSO); + void reserveMedium(size_t minCapacity); + void reserveLarge(size_t minCapacity); + + void shrinkSmall(size_t delta); + void shrinkMedium(size_t delta); + void shrinkLarge(size_t delta); + + Char* mutableDataLarge(); }; +template +inline void fbstring_core::copySmall(const fbstring_core& rhs) { + static_assert(offsetof(MediumLarge, data_) == 0, "fbstring layout failure"); + static_assert( + offsetof(MediumLarge, size_) == sizeof(ml_.data_), + "fbstring layout failure"); + static_assert( + offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_), + "fbstring layout failure"); + // Just write the whole thing, don't look at details. In + // particular we need to copy capacity anyway because we want + // to set the size (don't forget that the last character, + // which stores a short string's length, is shared with the + // ml_.capacity field). + ml_ = rhs.ml_; + assert(category() == Category::isSmall && this->size() == rhs.size()); +} + +template +inline void fbstring_core::copyMedium(const fbstring_core& rhs) { + // Medium strings are copied eagerly. Don't forget to allocate + // one extra Char for the null terminator. + auto const allocSize = goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char)); + ml_.data_ = static_cast(checkedMalloc(allocSize)); + // Also copies terminator. + fbstring_detail::podCopy( + rhs.ml_.data_, rhs.ml_.data_ + rhs.ml_.size_ + 1, ml_.data_); + ml_.size_ = rhs.ml_.size_; + ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); + assert(category() == Category::isMedium); +} + +template +inline void fbstring_core::copyLarge(const fbstring_core& rhs) { + // Large strings are just refcounted + ml_ = rhs.ml_; + RefCounted::incrementRefs(ml_.data_); + assert(category() == Category::isLarge && size() == rhs.size()); +} + +// Small strings are bitblitted +template +inline void fbstring_core::initSmall( + const Char* const data, const size_t size) { + // Layout is: Char* data_, size_t size_, size_t capacity_ + static_assert( + sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t), + "fbstring has unexpected size"); + static_assert( + sizeof(Char*) == sizeof(size_t), "fbstring size assumption violation"); + // sizeof(size_t) must be a power of 2 + static_assert( + (sizeof(size_t) & (sizeof(size_t) - 1)) == 0, + "fbstring size assumption violation"); + +// If data is aligned, use fast word-wise copying. Otherwise, +// use conservative memcpy. +// The word-wise path reads bytes which are outside the range of +// the string, and makes ASan unhappy, so we disable it when +// compiling with ASan. +#ifndef FBSTRING_SANITIZE_ADDRESS + if ((reinterpret_cast(data) & (sizeof(size_t) - 1)) == 0) { + const size_t byteSize = size * sizeof(Char); + constexpr size_t wordWidth = sizeof(size_t); + switch ((byteSize + wordWidth - 1) / wordWidth) { // Number of words. + case 3: + ml_.capacity_ = reinterpret_cast(data)[2]; + case 2: + ml_.size_ = reinterpret_cast(data)[1]; + case 1: + ml_.data_ = *reinterpret_cast(const_cast(data)); + case 0: + break; + } + } else +#endif + { + if (size != 0) { + fbstring_detail::podCopy(data, data + size, small_); + } + } + setSmallSize(size); +} + +template +inline void fbstring_core::initMedium( + const Char* const data, const size_t size) { + // Medium strings are allocated normally. Don't forget to + // allocate one extra Char for the terminating null. + auto const allocSize = goodMallocSize((1 + size) * sizeof(Char)); + ml_.data_ = static_cast(checkedMalloc(allocSize)); + fbstring_detail::podCopy(data, data + size, ml_.data_); + ml_.size_ = size; + ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); + ml_.data_[size] = '\0'; +} + +template +inline void fbstring_core::initLarge( + const Char* const data, const size_t size) { + // Large strings are allocated differently + size_t effectiveCapacity = size; + auto const newRC = RefCounted::create(data, &effectiveCapacity); + ml_.data_ = newRC->data_; + ml_.size_ = size; + ml_.setCapacity(effectiveCapacity, Category::isLarge); + ml_.data_[size] = '\0'; +} + +template +inline Char* fbstring_core::mutableDataLarge() { + assert(category() == Category::isLarge); + if (RefCounted::refs(ml_.data_) > 1) { + // Ensure unique. + size_t effectiveCapacity = ml_.capacity(); + auto const newRC = RefCounted::create(&effectiveCapacity); + // If this fails, someone placed the wrong capacity in an + // fbstring. + assert(effectiveCapacity >= ml_.capacity()); + // Also copies terminator. + fbstring_detail::podCopy( + ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_); + RefCounted::decrementRefs(ml_.data_); + ml_.data_ = newRC->data_; + } + return ml_.data_; +} + +template +inline void fbstring_core::reserveLarge(size_t minCapacity) { + assert(category() == Category::isLarge); + // Ensure unique + if (RefCounted::refs(ml_.data_) > 1) { + // We must make it unique regardless; in-place reallocation is + // useless if the string is shared. In order to not surprise + // people, reserve the new block at current capacity or + // more. That way, a string's capacity never shrinks after a + // call to reserve. + minCapacity = std::max(minCapacity, ml_.capacity()); + auto const newRC = RefCounted::create(&minCapacity); + // Also copies terminator. + fbstring_detail::podCopy( + ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_); + RefCounted::decrementRefs(ml_.data_); + ml_.data_ = newRC->data_; + ml_.setCapacity(minCapacity, Category::isLarge); + // size remains unchanged + } else { + // String is not shared, so let's try to realloc (if needed) + if (minCapacity > ml_.capacity()) { + // Asking for more memory + auto const newRC = RefCounted::reallocate( + ml_.data_, ml_.size_, ml_.capacity(), minCapacity); + ml_.data_ = newRC->data_; + ml_.setCapacity(minCapacity, Category::isLarge); + } + assert(capacity() >= minCapacity); + } +} + +template +inline void fbstring_core::reserveMedium(const size_t minCapacity) { + assert(category() == Category::isMedium); + // String is not shared + if (minCapacity <= ml_.capacity()) { + return; // nothing to do, there's enough room + } + if (minCapacity <= maxMediumSize) { + // Keep the string at medium size. Don't forget to allocate + // one extra Char for the terminating null. + size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); + // Also copies terminator. + ml_.data_ = static_cast(smartRealloc( + ml_.data_, + (ml_.size_ + 1) * sizeof(Char), + (ml_.capacity() + 1) * sizeof(Char), + capacityBytes)); + ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium); + } else { + // Conversion from medium to large string + fbstring_core nascent; + // Will recurse to another branch of this function + nascent.reserve(minCapacity); + nascent.ml_.size_ = ml_.size_; + // Also copies terminator. + fbstring_detail::podCopy( + ml_.data_, ml_.data_ + ml_.size_ + 1, nascent.ml_.data_); + nascent.swap(*this); + assert(capacity() >= minCapacity); + } +} + +template +inline void fbstring_core::reserveSmall( + size_t minCapacity, const bool disableSSO) { + assert(category() == Category::isSmall); + if (!disableSSO && minCapacity <= maxSmallSize) { + // small + // Nothing to do, everything stays put + } else if (minCapacity <= maxMediumSize) { + // medium + // Don't forget to allocate one extra Char for the terminating null + auto const allocSizeBytes = + goodMallocSize((1 + minCapacity) * sizeof(Char)); + auto const pData = static_cast(checkedMalloc(allocSizeBytes)); + auto const size = smallSize(); + // Also copies terminator. + fbstring_detail::podCopy(small_, small_ + size + 1, pData); + ml_.data_ = pData; + ml_.size_ = size; + ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium); + } else { + // large + auto const newRC = RefCounted::create(&minCapacity); + auto const size = smallSize(); + // Also copies terminator. + fbstring_detail::podCopy(small_, small_ + size + 1, newRC->data_); + ml_.data_ = newRC->data_; + ml_.size_ = size; + ml_.setCapacity(minCapacity, Category::isLarge); + assert(capacity() >= minCapacity); + } +} + +template +inline Char* fbstring_core::expandNoinit( + const size_t delta, + bool expGrowth, /* = false */ + bool disableSSO /* = FBSTRING_DISABLE_SSO */) { + // Strategy is simple: make room, then change size + assert(capacity() >= size()); + size_t sz, newSz; + if (category() == Category::isSmall) { + sz = smallSize(); + newSz = sz + delta; + if (!disableSSO && FBSTRING_LIKELY(newSz <= maxSmallSize)) { + setSmallSize(newSz); + return small_ + sz; + } + reserveSmall( + expGrowth ? std::max(newSz, 2 * maxSmallSize) : newSz, disableSSO); + } else { + sz = ml_.size_; + newSz = sz + delta; + if (FBSTRING_UNLIKELY(newSz > capacity())) { + // ensures not shared + reserve(expGrowth ? std::max(newSz, 1 + capacity() * 3 / 2) : newSz); + } + } + assert(capacity() >= newSz); + // Category can't be small - we took care of that above + assert(category() == Category::isMedium || category() == Category::isLarge); + ml_.size_ = newSz; + ml_.data_[newSz] = '\0'; + assert(size() == newSz); + return ml_.data_ + sz; +} + +template +inline void fbstring_core::shrinkSmall(const size_t delta) { + // Check for underflow + assert(delta <= smallSize()); + setSmallSize(smallSize() - delta); +} + +template +inline void fbstring_core::shrinkMedium(const size_t delta) { + // Medium strings and unique large strings need no special + // handling. + assert(ml_.size_ >= delta); + ml_.size_ -= delta; + ml_.data_[ml_.size_] = '\0'; +} + +template +inline void fbstring_core::shrinkLarge(const size_t delta) { + assert(ml_.size_ >= delta); + // Shared large string, must make unique. This is because of the + // durn terminator must be written, which may trample the shared + // data. + if (delta) { + fbstring_core(ml_.data_, ml_.size_ - delta).swap(*this); + } + // No need to write the terminator. +} + #ifndef _LIBSTDCXX_FBSTRING /** * Dummy fbstring core that uses an actual std::string. This doesn't @@ -855,15 +984,14 @@ public: const Char * data() const { return backend_.data(); } - Char * mutable_data() { - //assert(!backend_.empty()); - return &*backend_.begin(); + Char* mutableData() { + return const_cast(backend_.data()); } void shrink(size_t delta) { assert(delta <= size()); backend_.resize(size() - delta); } - Char * expand_noinit(size_t delta) { + Char* expandNoinit(size_t delta) { auto const sz = size(); backend_.resize(size() + delta); return backend_.data() + sz; @@ -908,7 +1036,9 @@ class basic_fbstring { bool condition, void (*throw_exc)(const char*), const char* msg) { - if (!condition) throw_exc(msg); + if (!condition) { + throw_exc(msg); + } } bool isSane() const { @@ -925,6 +1055,7 @@ class basic_fbstring { struct Invariant; friend struct Invariant; struct Invariant { + Invariant& operator=(const Invariant&) = delete; #ifndef NDEBUG explicit Invariant(const basic_fbstring& s) : s_(s) { assert(s_.isSane()); @@ -937,7 +1068,6 @@ class basic_fbstring { #else explicit Invariant(const basic_fbstring&) {} #endif - Invariant& operator=(const Invariant&); }; public: @@ -966,14 +1096,18 @@ public: #endif > const_reverse_iterator; - static const size_type npos; // = size_type(-1) + static constexpr size_type npos = size_type(-1); typedef std::true_type IsRelocatable; private: static void procrustes(size_type& n, size_type nmax) { - if (n > nmax) n = nmax; + if (n > nmax) { + n = nmax; + } } + static size_type traitsLength(const value_type* s); + public: // C++11 21.4.2 construct/copy/destroy @@ -1019,11 +1153,7 @@ public: } /* implicit */ basic_fbstring(const value_type* s, const A& /*a*/ = A()) - : store_(s, s - ? traits_type::length(s) - : (std::__throw_logic_error( - "basic_fbstring: null pointer initializer not valid"), - 0)) { + : store_(s, basic_fbstring::traitsLength(s)) { } basic_fbstring(const value_type* s, size_type n, const A& /*a*/ = A()) @@ -1031,20 +1161,22 @@ public: } basic_fbstring(size_type n, value_type c, const A& /*a*/ = A()) { - auto const pData = store_.expand_noinit(n); - fbstring_detail::pod_fill(pData, pData + n, c); + auto const pData = store_.expandNoinit(n); + fbstring_detail::podFill(pData, pData + n, c); } template - basic_fbstring(InIt begin, InIt end, - typename std::enable_if< - !std::is_same::type, - value_type*>::value, const A>::type & /*a*/ = A()) { + basic_fbstring( + InIt begin, + InIt end, + typename std::enable_if< + !std::is_same::value, + const A>::type& /*a*/ = A()) { assign(begin, end); } // Specialization for const char*, const char* - basic_fbstring(const value_type* b, const value_type* e) + basic_fbstring(const value_type* b, const value_type* e, const A& /*a*/ = A()) : store_(b, e - b) { } @@ -1062,29 +1194,10 @@ public: ~basic_fbstring() noexcept { } - basic_fbstring& operator=(const basic_fbstring& lhs) { - Invariant checker(*this); - - if (FBSTRING_UNLIKELY(&lhs == this)) { - return *this; - } - - return assign(lhs.data(), lhs.size()); - } + basic_fbstring& operator=(const basic_fbstring& lhs); // Move assignment - basic_fbstring& operator=(basic_fbstring&& goner) noexcept { - if (FBSTRING_UNLIKELY(&goner == this)) { - // Compatibility with std::basic_string<>, - // C++11 21.4.2 [string.cons] / 23 requires self-move-assignment support. - return *this; - } - // No need of this anymore - this->~basic_fbstring(); - // Move the goner into this - new (&store_) Storage(std::move(goner.store_)); - return *this; - } + basic_fbstring& operator=(basic_fbstring&& goner) noexcept; #ifndef _LIBSTDCXX_FBSTRING // Compatibility with std::string @@ -1107,34 +1220,27 @@ public: return assign(s); } - basic_fbstring& operator=(value_type c) { - Invariant checker(*this); - - if (empty()) { - store_.expand_noinit(1); - } else if (store_.isShared()) { - basic_fbstring(1, c).swap(*this); - return *this; - } else { - store_.shrink(size() - 1); - } - front() = c; - return *this; - } + basic_fbstring& operator=(value_type c); basic_fbstring& operator=(std::initializer_list il) { return assign(il.begin(), il.end()); } // C++11 21.4.3 iterators: - iterator begin() { return store_.mutable_data(); } + iterator begin() { + return store_.mutableData(); + } - const_iterator begin() const { return store_.data(); } + const_iterator begin() const { + return store_.data(); + } - const_iterator cbegin() const { return begin(); } + const_iterator cbegin() const { + return begin(); + } iterator end() { - return store_.mutable_data() + store_.size(); + return store_.mutableData() + store_.size(); } const_iterator end() const { @@ -1191,19 +1297,7 @@ public: return std::numeric_limits::max(); } - void resize(const size_type n, const value_type c = value_type()) { - Invariant checker(*this); - - auto size = this->size(); - if (n <= size) { - store_.shrink(size - n); - } else { - auto const delta = n - size; - auto pData = store_.expand_noinit(delta); - fbstring_detail::pod_fill(pData, pData + delta, c); - } - assert(this->size() == n); - } + void resize(size_type n, value_type c = value_type()); size_type capacity() const { return store_.capacity(); } @@ -1262,64 +1356,18 @@ public: return *this; } - basic_fbstring& append(const basic_fbstring& str) { -#ifndef NDEBUG - auto desiredSize = size() + str.size(); -#endif - append(str.data(), str.size()); - assert(size() == desiredSize); - return *this; - } + basic_fbstring& append(const basic_fbstring& str); - basic_fbstring& append(const basic_fbstring& str, const size_type pos, - size_type n) { - const size_type sz = str.size(); - enforce(pos <= sz, std::__throw_out_of_range, ""); - procrustes(n, sz - pos); - return append(str.data() + pos, n); - } + basic_fbstring& + append(const basic_fbstring& str, const size_type pos, size_type n); - basic_fbstring& append(const value_type* s, size_type n) { - Invariant checker(*this); - - if (FBSTRING_UNLIKELY(!n)) { - // Unlikely but must be done - return *this; - } - auto const oldSize = size(); - auto const oldData = data(); - auto pData = store_.expand_noinit(n, /* expGrowth = */ true); - - // Check for aliasing (rare). We could use "<=" here but in theory - // those do not work for pointers unless the pointers point to - // elements in the same array. For that reason we use - // std::less_equal, which is guaranteed to offer a total order - // over pointers. See discussion at http://goo.gl/Cy2ya for more - // info. - std::less_equal le; - if (FBSTRING_UNLIKELY(le(oldData, s) && !le(oldData + oldSize, s))) { - assert(le(s + n, oldData + oldSize)); - // expand_noinit() could have moved the storage, restore the source. - s = data() + (s - oldData); - fbstring_detail::pod_move(s, s + n, pData); - } else { - fbstring_detail::pod_copy(s, s + n, pData); - } - - assert(size() == oldSize + n); - return *this; - } + basic_fbstring& append(const value_type* s, size_type n); basic_fbstring& append(const value_type* s) { return append(s, traits_type::length(s)); } - basic_fbstring& append(size_type n, value_type c) { - Invariant checker(*this); - auto pData = store_.expand_noinit(n, /* expGrowth = */ true); - fbstring_detail::pod_fill(pData, pData + n, c); - return *this; - } + basic_fbstring& append(size_type n, value_type c); template basic_fbstring& append(InputIterator first, InputIterator last) { @@ -1344,36 +1392,10 @@ public: return *this = std::move(str); } - basic_fbstring& assign(const basic_fbstring& str, const size_type pos, - size_type n) { - const size_type sz = str.size(); - enforce(pos <= sz, std::__throw_out_of_range, ""); - procrustes(n, sz - pos); - return assign(str.data() + pos, n); - } - - basic_fbstring& assign(const value_type* s, const size_type n) { - Invariant checker(*this); - - if (n == 0) { - resize(0); - } else if (size() >= n) { - // s can alias this, we need to use pod_move. - fbstring_detail::pod_move(s, s + n, store_.mutable_data()); - store_.shrink(size() - n); - assert(size() == n); - } else { - // If n is larger than size(), s cannot alias this string's - // storage. - resize(0); - // Do not use exponential growth here: assign() should be tight, - // to mirror the behavior of the equivalent constructor. - fbstring_detail::pod_copy(s, s + n, store_.expand_noinit(n)); - } + basic_fbstring& + assign(const basic_fbstring& str, const size_type pos, size_type n); - assert(size() == n); - return *this; - } + basic_fbstring& assign(const value_type* s, const size_type n); basic_fbstring& assign(const value_type* s) { return assign(s, traits_type::length(s)); @@ -1424,39 +1446,13 @@ public: #ifndef _LIBSTDCXX_FBSTRING private: typedef std::basic_istream istream_type; + istream_type& getlineImpl(istream_type& is, value_type delim); public: friend inline istream_type& getline(istream_type& is, basic_fbstring& str, value_type delim) { - Invariant checker(str); - - str.clear(); - size_t size = 0; - while (true) { - size_t avail = str.capacity() - size; - // fbstring has 1 byte extra capacity for the null terminator, - // and getline null-terminates the read string. - is.getline(str.store_.expand_noinit(avail), avail + 1, delim); - size += is.gcount(); - - if (is.bad() || is.eof() || !is.fail()) { - // Done by either failure, end of file, or normal read. - if (!is.bad() && !is.eof()) { - --size; // gcount() also accounts for the delimiter. - } - str.resize(size); - break; - } - - assert(size == str.size()); - assert(size == str.capacity()); - // Start at minimum allocation 63 + terminator = 64. - str.reserve(std::max(63, 3 * size / 2)); - // Clear the error so we can continue reading. - is.clear(); - } - return is; + return str.getlineImpl(is, delim); } friend inline istream_type& getline(istream_type& is, basic_fbstring& str) { @@ -1465,71 +1461,34 @@ public: #endif private: - template class Selector {}; + iterator + insertImplDiscr(const_iterator i, size_type n, value_type c, std::true_type); - iterator insertImplDiscr(const_iterator i, - size_type n, value_type c, Selector<1>) { - Invariant checker(*this); + template + iterator + insertImplDiscr(const_iterator i, InputIter b, InputIter e, std::false_type); - assert(i >= cbegin() && i <= cend()); - const size_type pos = i - cbegin(); - - auto oldSize = size(); - store_.expand_noinit(n, /* expGrowth = */ true); - auto b = begin(); - fbstring_detail::pod_move(b + pos, b + oldSize, b + pos + n); - fbstring_detail::pod_fill(b + pos, b + pos + n, c); - - return b + pos; - } - - template - iterator insertImplDiscr(const_iterator i, - InputIter b, InputIter e, Selector<0>) { - return insertImpl(i, b, e, - typename std::iterator_traits::iterator_category()); - } - - template - iterator insertImpl(const_iterator i, - FwdIterator s1, - FwdIterator s2, - std::forward_iterator_tag) { - Invariant checker(*this); - - assert(i >= cbegin() && i <= cend()); - const size_type pos = i - cbegin(); - auto n = std::distance(s1, s2); - assert(n >= 0); - - auto oldSize = size(); - store_.expand_noinit(n, /* expGrowth = */ true); - auto b = begin(); - fbstring_detail::pod_move(b + pos, b + oldSize, b + pos + n); - std::copy(s1, s2, b + pos); - - return b + pos; - } - - template - iterator insertImpl(const_iterator i, - InputIterator b, InputIterator e, - std::input_iterator_tag) { - const auto pos = i - cbegin(); - basic_fbstring temp(cbegin(), i); - for (; b != e; ++b) { - temp.push_back(*b); - } - temp.append(i, cend()); - swap(temp); - return begin() + pos; - } + template + iterator insertImpl( + const_iterator i, + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag); + + template + iterator insertImpl( + const_iterator i, + InputIterator b, + InputIterator e, + std::input_iterator_tag); public: template iterator insert(const_iterator p, ItOrLength first_or_n, ItOrChar last_or_c) { - Selector::is_specialized> sel; - return insertImplDiscr(p, first_or_n, last_or_c, sel); + using Sel = std::integral_constant< + bool, + std::numeric_limits::is_specialized>; + return insertImplDiscr(p, first_or_n, last_or_c, Sel()); } iterator insert(const_iterator p, std::initializer_list il) { @@ -1608,37 +1567,27 @@ public: } private: - basic_fbstring& replaceImplDiscr(iterator i1, iterator i2, - const value_type* s, size_type n, - Selector<2>) { - assert(i1 <= i2); - assert(begin() <= i1 && i1 <= end()); - assert(begin() <= i2 && i2 <= end()); - return replace(i1, i2, s, s + n); - } - - basic_fbstring& replaceImplDiscr(iterator i1, iterator i2, - size_type n2, value_type c, Selector<1>) { - const size_type n1 = i2 - i1; - if (n1 > n2) { - std::fill(i1, i1 + n2, c); - erase(i1 + n2, i2); - } else { - std::fill(i1, i2, c); - insert(i2, n2 - n1, c); - } - assert(isSane()); - return *this; - } - - template - basic_fbstring& replaceImplDiscr(iterator i1, iterator i2, - InputIter b, InputIter e, - Selector<0>) { - replaceImpl(i1, i2, b, e, - typename std::iterator_traits::iterator_category()); - return *this; - } + basic_fbstring& replaceImplDiscr( + iterator i1, + iterator i2, + const value_type* s, + size_type n, + std::integral_constant); + + basic_fbstring& replaceImplDiscr( + iterator i1, + iterator i2, + size_type n2, + value_type c, + std::integral_constant); + + template + basic_fbstring& replaceImplDiscr( + iterator i1, + iterator i2, + InputIter b, + InputIter e, + std::integral_constant); private: template @@ -1651,71 +1600,38 @@ private: } template - bool replaceAliased(iterator i1, iterator i2, - FwdIterator s1, FwdIterator s2, std::true_type) { - static const std::less_equal le = - std::less_equal(); - const bool aliased = le(&*begin(), &*s1) && le(&*s1, &*end()); - if (!aliased) { - return false; - } - // Aliased replace, copy to new string - basic_fbstring temp; - temp.reserve(size() - (i2 - i1) + std::distance(s1, s2)); - temp.append(begin(), i1).append(s1, s2).append(i2, end()); - swap(temp); - return true; - } + bool replaceAliased( + iterator i1, + iterator i2, + FwdIterator s1, + FwdIterator s2, + std::true_type); template - void replaceImpl(iterator i1, iterator i2, - FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) { - Invariant checker(*this); - - // Handle aliased replace - if (replaceAliased(i1, i2, s1, s2, - std::integral_constant::value || - std::is_same::value>())) { - return; - } - - auto const n1 = i2 - i1; - assert(n1 >= 0); - auto const n2 = std::distance(s1, s2); - assert(n2 >= 0); - - if (n1 > n2) { - // shrinks - std::copy(s1, s2, i1); - erase(i1 + n2, i2); - } else { - // grows - fbstring_detail::copy_n(s1, n1, i1); - std::advance(s1, n1); - insert(i2, s1, s2); - } - assert(isSane()); - } + void replaceImpl( + iterator i1, + iterator i2, + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag); template - void replaceImpl(iterator i1, iterator i2, - InputIterator b, InputIterator e, std::input_iterator_tag) { - basic_fbstring temp(begin(), i1); - temp.append(b, e).append(i2, end()); - swap(temp); - } + void replaceImpl( + iterator i1, + iterator i2, + InputIterator b, + InputIterator e, + std::input_iterator_tag); -public: + public: template basic_fbstring& replace(iterator i1, iterator i2, T1 first_or_n_or_s, T2 last_or_c_or_n) { - const bool - num1 = std::numeric_limits::is_specialized, - num2 = std::numeric_limits::is_specialized; - return replaceImplDiscr( - i1, i2, first_or_n_or_s, last_or_c_or_n, - Selector()); + constexpr bool num1 = std::numeric_limits::is_specialized, + num2 = std::numeric_limits::is_specialized; + using Sel = + std::integral_constant; + return replaceImplDiscr(i1, i2, first_or_n_or_s, last_or_c_or_n, Sel()); } size_type copy(value_type* s, size_type n, size_type pos = 0) const { @@ -1723,7 +1639,7 @@ public: procrustes(n, size() - pos); if (n != 0) { - fbstring_detail::pod_copy(data() + pos, data() + pos + n, s); + fbstring_detail::podCopy(data() + pos, data() + pos + n, s); } return n; } @@ -1746,61 +1662,8 @@ public: return find(str.data(), pos, str.length()); } - size_type find(const value_type* needle, const size_type pos, - const size_type nsize) const { - auto const size = this->size(); - // nsize + pos can overflow (eg pos == npos), guard against that by checking - // that nsize + pos does not wrap around. - if (nsize + pos > size || nsize + pos < pos) return npos; - - if (nsize == 0) return pos; - // Don't use std::search, use a Boyer-Moore-like trick by comparing - // the last characters first - auto const haystack = data(); - auto const nsize_1 = nsize - 1; - auto const lastNeedle = needle[nsize_1]; - - // Boyer-Moore skip value for the last char in the needle. Zero is - // not a valid value; skip will be computed the first time it's - // needed. - size_type skip = 0; - - const E * i = haystack + pos; - auto iEnd = haystack + size - nsize_1; - - while (i < iEnd) { - // Boyer-Moore: match the last element in the needle - while (i[nsize_1] != lastNeedle) { - if (++i == iEnd) { - // not found - return npos; - } - } - // Here we know that the last char matches - // Continue in pedestrian mode - for (size_t j = 0; ; ) { - assert(j < nsize); - if (i[j] != needle[j]) { - // Not found, we can skip - // Compute the skip value lazily - if (skip == 0) { - skip = 1; - while (skip <= nsize_1 && needle[nsize_1 - skip] != lastNeedle) { - ++skip; - } - } - i += skip; - break; - } - // Check if done searching - if (++j == nsize) { - // Yay - return i - haystack; - } - } - } - return npos; - } + size_type find(const value_type* needle, size_type pos, size_type nsize) + const; size_type find(const value_type* s, size_type pos = 0) const { return find(s, pos, traits_type::length(s)); @@ -1814,21 +1677,7 @@ public: return rfind(str.data(), pos, str.length()); } - size_type rfind(const value_type* s, size_type pos, size_type n) const { - if (n > length()) return npos; - pos = std::min(pos, length() - n); - if (n == 0) return pos; - - const_iterator i(begin() + pos); - for (; ; --i) { - if (traits_type::eq(*i, *s) - && traits_type::compare(&*i, s, n) == 0) { - return i - begin(); - } - if (i == begin()) break; - } - return npos; - } + size_type rfind(const value_type* s, size_type pos, size_type n) const; size_type rfind(const value_type* s, size_type pos = npos) const { return rfind(s, pos, traits_type::length(s)); @@ -1842,18 +1691,8 @@ public: return find_first_of(str.data(), pos, str.length()); } - size_type find_first_of(const value_type* s, - size_type pos, size_type n) const { - if (pos > length() || n == 0) return npos; - const_iterator i(begin() + pos), - finish(end()); - for (; i != finish; ++i) { - if (traits_type::find(s, n, *i) != 0) { - return i - begin(); - } - } - return npos; - } + size_type find_first_of(const value_type* s, size_type pos, size_type n) + const; size_type find_first_of(const value_type* s, size_type pos = 0) const { return find_first_of(s, pos, traits_type::length(s)); @@ -1863,25 +1702,12 @@ public: return find_first_of(&c, pos, 1); } - size_type find_last_of (const basic_fbstring& str, - size_type pos = npos) const { + size_type find_last_of(const basic_fbstring& str, size_type pos = npos) + const { return find_last_of(str.data(), pos, str.length()); } - size_type find_last_of (const value_type* s, size_type pos, - size_type n) const { - if (!empty() && n > 0) { - pos = std::min(pos, length() - 1); - const_iterator i(begin() + pos); - for (;; --i) { - if (traits_type::find(s, n, *i) != 0) { - return i - begin(); - } - if (i == begin()) break; - } - } - return npos; - } + size_type find_last_of(const value_type* s, size_type pos, size_type n) const; size_type find_last_of (const value_type* s, size_type pos = npos) const { @@ -1897,20 +1723,8 @@ public: return find_first_not_of(str.data(), pos, str.size()); } - size_type find_first_not_of(const value_type* s, size_type pos, - size_type n) const { - if (pos < length()) { - const_iterator - i(begin() + pos), - finish(end()); - for (; i != finish; ++i) { - if (traits_type::find(s, n, *i) == 0) { - return i - begin(); - } - } - } - return npos; - } + size_type find_first_not_of(const value_type* s, size_type pos, size_type n) + const; size_type find_first_not_of(const value_type* s, size_type pos = 0) const { @@ -1926,20 +1740,8 @@ public: return find_last_not_of(str.data(), pos, str.length()); } - size_type find_last_not_of(const value_type* s, size_type pos, - size_type n) const { - if (!this->empty()) { - pos = std::min(pos, size() - 1); - const_iterator i(begin() + pos); - for (;; --i) { - if (traits_type::find(s, n, *i) == 0) { - return i - begin(); - } - if (i == begin()) break; - } - } - return npos; - } + size_type find_last_not_of(const value_type* s, size_type pos, size_type n) + const; size_type find_last_not_of(const value_type* s, size_type pos = npos) const { @@ -1958,7 +1760,9 @@ public: basic_fbstring substr(size_type pos = 0, size_type n = npos) && { enforce(pos <= size(), std::__throw_out_of_range, ""); erase(0, pos); - if (n < size()) resize(n); + if (n < size()) { + resize(n); + } return std::move(*this); } @@ -2008,6 +1812,544 @@ private: Storage store_; }; +template +inline typename basic_fbstring::size_type +basic_fbstring::traitsLength(const value_type* s) { + return s ? traits_type::length(s) + : (std::__throw_logic_error( + "basic_fbstring: null pointer initializer not valid"), + 0); +} + +template +inline basic_fbstring& basic_fbstring::operator=( + const basic_fbstring& lhs) { + Invariant checker(*this); + + if (FBSTRING_UNLIKELY(&lhs == this)) { + return *this; + } + + return assign(lhs.data(), lhs.size()); +} + +// Move assignment +template +inline basic_fbstring& basic_fbstring::operator=( + basic_fbstring&& goner) noexcept { + if (FBSTRING_UNLIKELY(&goner == this)) { + // Compatibility with std::basic_string<>, + // C++11 21.4.2 [string.cons] / 23 requires self-move-assignment support. + return *this; + } + // No need of this anymore + this->~basic_fbstring(); + // Move the goner into this + new (&store_) S(std::move(goner.store_)); + return *this; +} + +template +inline basic_fbstring& basic_fbstring::operator=( + const value_type c) { + Invariant checker(*this); + + if (empty()) { + store_.expandNoinit(1); + } else if (store_.isShared()) { + basic_fbstring(1, c).swap(*this); + return *this; + } else { + store_.shrink(size() - 1); + } + front() = c; + return *this; +} + +template +inline void basic_fbstring::resize( + const size_type n, const value_type c /*= value_type()*/) { + Invariant checker(*this); + + auto size = this->size(); + if (n <= size) { + store_.shrink(size - n); + } else { + auto const delta = n - size; + auto pData = store_.expandNoinit(delta); + fbstring_detail::podFill(pData, pData + delta, c); + } + assert(this->size() == n); +} + +template +inline basic_fbstring& basic_fbstring::append( + const basic_fbstring& str) { +#ifndef NDEBUG + auto desiredSize = size() + str.size(); +#endif + append(str.data(), str.size()); + assert(size() == desiredSize); + return *this; +} + +template +inline basic_fbstring& basic_fbstring::append( + const basic_fbstring& str, const size_type pos, size_type n) { + const size_type sz = str.size(); + enforce(pos <= sz, std::__throw_out_of_range, ""); + procrustes(n, sz - pos); + return append(str.data() + pos, n); +} + +template +inline basic_fbstring& basic_fbstring::append( + const value_type* s, size_type n) { + Invariant checker(*this); + + if (FBSTRING_UNLIKELY(!n)) { + // Unlikely but must be done + return *this; + } + auto const oldSize = size(); + auto const oldData = data(); + auto pData = store_.expandNoinit(n, /* expGrowth = */ true); + + // Check for aliasing (rare). We could use "<=" here but in theory + // those do not work for pointers unless the pointers point to + // elements in the same array. For that reason we use + // std::less_equal, which is guaranteed to offer a total order + // over pointers. See discussion at http://goo.gl/Cy2ya for more + // info. + std::less_equal le; + if (FBSTRING_UNLIKELY(le(oldData, s) && !le(oldData + oldSize, s))) { + assert(le(s + n, oldData + oldSize)); + // expandNoinit() could have moved the storage, restore the source. + s = data() + (s - oldData); + fbstring_detail::podMove(s, s + n, pData); + } else { + fbstring_detail::podCopy(s, s + n, pData); + } + + assert(size() == oldSize + n); + return *this; +} + +template +inline basic_fbstring& basic_fbstring::append( + size_type n, value_type c) { + Invariant checker(*this); + auto pData = store_.expandNoinit(n, /* expGrowth = */ true); + fbstring_detail::podFill(pData, pData + n, c); + return *this; +} + +template +inline basic_fbstring& basic_fbstring::assign( + const basic_fbstring& str, const size_type pos, size_type n) { + const size_type sz = str.size(); + enforce(pos <= sz, std::__throw_out_of_range, ""); + procrustes(n, sz - pos); + return assign(str.data() + pos, n); +} + +template +inline basic_fbstring& basic_fbstring::assign( + const value_type* s, const size_type n) { + Invariant checker(*this); + + // s can alias this, we need to use podMove. + if (n == 0) { + resize(0); + } else if (size() >= n) { + // s can alias this, we need to use podMove. + fbstring_detail::podMove(s, s + n, store_.mutableData()); + store_.shrink(size() - n); + assert(size() == n); + } else { + // If n is larger than size(), s cannot alias this string's + // storage. + resize(0); + // Do not use exponential growth here: assign() should be tight, + // to mirror the behavior of the equivalent constructor. + fbstring_detail::podCopy(s, s + n, store_.expandNoinit(n)); + } + + assert(size() == n); + return *this; +} + +#ifndef _LIBSTDCXX_FBSTRING +template +inline typename basic_fbstring::istream_type& +basic_fbstring::getlineImpl(istream_type & is, value_type delim) { + Invariant checker(*this); + + clear(); + size_t size = 0; + while (true) { + size_t avail = capacity() - size; + // fbstring has 1 byte extra capacity for the null terminator, + // and getline null-terminates the read string. + is.getline(store_.expandNoinit(avail), avail + 1, delim); + size += is.gcount(); + + if (is.bad() || is.eof() || !is.fail()) { + // Done by either failure, end of file, or normal read. + if (!is.bad() && !is.eof()) { + --size; // gcount() also accounts for the delimiter. + } + resize(size); + break; + } + + assert(size == this->size()); + assert(size == capacity()); + // Start at minimum allocation 63 + terminator = 64. + reserve(std::max(63, 3 * size / 2)); + // Clear the error so we can continue reading. + is.clear(); + } + return is; +} +#endif + +template +inline typename basic_fbstring::size_type +basic_fbstring::find( + const value_type* needle, const size_type pos, const size_type nsize) + const { + auto const size = this->size(); + // nsize + pos can overflow (eg pos == npos), guard against that by checking + // that nsize + pos does not wrap around. + if (nsize + pos > size || nsize + pos < pos) { + return npos; + } + + if (nsize == 0) { + return pos; + } + // Don't use std::search, use a Boyer-Moore-like trick by comparing + // the last characters first + auto const haystack = data(); + auto const nsize_1 = nsize - 1; + auto const lastNeedle = needle[nsize_1]; + + // Boyer-Moore skip value for the last char in the needle. Zero is + // not a valid value; skip will be computed the first time it's + // needed. + size_type skip = 0; + + const E* i = haystack + pos; + auto iEnd = haystack + size - nsize_1; + + while (i < iEnd) { + // Boyer-Moore: match the last element in the needle + while (i[nsize_1] != lastNeedle) { + if (++i == iEnd) { + // not found + return npos; + } + } + // Here we know that the last char matches + // Continue in pedestrian mode + for (size_t j = 0;;) { + assert(j < nsize); + if (i[j] != needle[j]) { + // Not found, we can skip + // Compute the skip value lazily + if (skip == 0) { + skip = 1; + while (skip <= nsize_1 && needle[nsize_1 - skip] != lastNeedle) { + ++skip; + } + } + i += skip; + break; + } + // Check if done searching + if (++j == nsize) { + // Yay + return i - haystack; + } + } + } + return npos; +} + +template +inline typename basic_fbstring::iterator +basic_fbstring::insertImplDiscr( + const_iterator i, size_type n, value_type c, std::true_type) { + Invariant checker(*this); + + assert(i >= cbegin() && i <= cend()); + const size_type pos = i - cbegin(); + + auto oldSize = size(); + store_.expandNoinit(n, /* expGrowth = */ true); + auto b = begin(); + fbstring_detail::podMove(b + pos, b + oldSize, b + pos + n); + fbstring_detail::podFill(b + pos, b + pos + n, c); + + return b + pos; +} + +template +template +inline typename basic_fbstring::iterator +basic_fbstring::insertImplDiscr( + const_iterator i, InputIter b, InputIter e, std::false_type) { + return insertImpl( + i, b, e, typename std::iterator_traits::iterator_category()); +} + +template +template +inline typename basic_fbstring::iterator +basic_fbstring::insertImpl( + const_iterator i, + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag) { + Invariant checker(*this); + + assert(i >= cbegin() && i <= cend()); + const size_type pos = i - cbegin(); + auto n = std::distance(s1, s2); + assert(n >= 0); + + auto oldSize = size(); + store_.expandNoinit(n, /* expGrowth = */ true); + auto b = begin(); + fbstring_detail::podMove(b + pos, b + oldSize, b + pos + n); + std::copy(s1, s2, b + pos); + + return b + pos; +} + +template +template +inline typename basic_fbstring::iterator +basic_fbstring::insertImpl( + const_iterator i, + InputIterator b, + InputIterator e, + std::input_iterator_tag) { + const auto pos = i - cbegin(); + basic_fbstring temp(cbegin(), i); + for (; b != e; ++b) { + temp.push_back(*b); + } + temp.append(i, cend()); + swap(temp); + return begin() + pos; +} + +template +inline basic_fbstring& basic_fbstring::replaceImplDiscr( + iterator i1, + iterator i2, + const value_type* s, + size_type n, + std::integral_constant) { + assert(i1 <= i2); + assert(begin() <= i1 && i1 <= end()); + assert(begin() <= i2 && i2 <= end()); + return replace(i1, i2, s, s + n); +} + +template +inline basic_fbstring& basic_fbstring::replaceImplDiscr( + iterator i1, + iterator i2, + size_type n2, + value_type c, + std::integral_constant) { + const size_type n1 = i2 - i1; + if (n1 > n2) { + std::fill(i1, i1 + n2, c); + erase(i1 + n2, i2); + } else { + std::fill(i1, i2, c); + insert(i2, n2 - n1, c); + } + assert(isSane()); + return *this; +} + +template +template +inline basic_fbstring& basic_fbstring::replaceImplDiscr( + iterator i1, + iterator i2, + InputIter b, + InputIter e, + std::integral_constant) { + using Cat = typename std::iterator_traits::iterator_category; + replaceImpl(i1, i2, b, e, Cat()); + return *this; +} + +template +template +inline bool basic_fbstring::replaceAliased( + iterator i1, iterator i2, FwdIterator s1, FwdIterator s2, std::true_type) { + std::less_equal le{}; + const bool aliased = le(&*begin(), &*s1) && le(&*s1, &*end()); + if (!aliased) { + return false; + } + // Aliased replace, copy to new string + basic_fbstring temp; + temp.reserve(size() - (i2 - i1) + std::distance(s1, s2)); + temp.append(begin(), i1).append(s1, s2).append(i2, end()); + swap(temp); + return true; +} + +template +template +inline void basic_fbstring::replaceImpl( + iterator i1, + iterator i2, + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag) { + Invariant checker(*this); + + // Handle aliased replace + using Sel = std::integral_constant< + bool, + std::is_same::value || + std::is_same::value>; + if (replaceAliased(i1, i2, s1, s2, Sel())) { + return; + } + + auto const n1 = i2 - i1; + assert(n1 >= 0); + auto const n2 = std::distance(s1, s2); + assert(n2 >= 0); + + if (n1 > n2) { + // shrinks + std::copy(s1, s2, i1); + erase(i1 + n2, i2); + } else { + // grows + s1 = fbstring_detail::copy_n(s1, n1, i1).first; + insert(i2, s1, s2); + } + assert(isSane()); +} + +template +template +inline void basic_fbstring::replaceImpl( + iterator i1, + iterator i2, + InputIterator b, + InputIterator e, + std::input_iterator_tag) { + basic_fbstring temp(begin(), i1); + temp.append(b, e).append(i2, end()); + swap(temp); +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::rfind( + const value_type* s, size_type pos, size_type n) const { + if (n > length()) { + return npos; + } + pos = std::min(pos, length() - n); + if (n == 0) { + return pos; + } + + const_iterator i(begin() + pos); + for (;; --i) { + if (traits_type::eq(*i, *s) && traits_type::compare(&*i, s, n) == 0) { + return i - begin(); + } + if (i == begin()) { + break; + } + } + return npos; +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::find_first_of( + const value_type* s, size_type pos, size_type n) const { + if (pos > length() || n == 0) { + return npos; + } + const_iterator i(begin() + pos), finish(end()); + for (; i != finish; ++i) { + if (traits_type::find(s, n, *i) != 0) { + return i - begin(); + } + } + return npos; +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::find_last_of( + const value_type* s, size_type pos, size_type n) const { + if (!empty() && n > 0) { + pos = std::min(pos, length() - 1); + const_iterator i(begin() + pos); + for (;; --i) { + if (traits_type::find(s, n, *i) != 0) { + return i - begin(); + } + if (i == begin()) { + break; + } + } + } + return npos; +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::find_first_not_of( + const value_type* s, size_type pos, size_type n) const { + if (pos < length()) { + const_iterator i(begin() + pos), finish(end()); + for (; i != finish; ++i) { + if (traits_type::find(s, n, *i) == 0) { + return i - begin(); + } + } + } + return npos; +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::find_last_not_of( + const value_type* s, size_type pos, size_type n) const { + if (!this->empty()) { + pos = std::min(pos, size() - 1); + const_iterator i(begin() + pos); + for (;; --i) { + if (traits_type::find(s, n, *i) == 0) { + return i - begin(); + } + if (i == begin()) { + break; + } + } + } + return npos; +} + // non-member functions // C++11 21.4.8.1/1 template @@ -2311,7 +2653,9 @@ std::basic_istream< is.width(0); break; } - if (isspace(got)) break; + if (isspace(got)) { + break; + } str.push_back(got); got = is.rdbuf()->snextc(); } @@ -2363,9 +2707,8 @@ operator<<( } template -const typename basic_fbstring::size_type -basic_fbstring::npos = - static_cast::size_type>(-1); +constexpr typename basic_fbstring::size_type + basic_fbstring::npos; #ifndef _LIBSTDCXX_FBSTRING // basic_string compatibility routines @@ -2421,7 +2764,7 @@ _GLIBCXX_END_NAMESPACE_VERSION #define FOLLY_FBSTRING_HASH1(T) \ template <> \ - struct hash< ::folly::basic_fbstring> { \ + struct hash< ::folly::basic_fbstring> { \ size_t operator()(const ::folly::basic_fbstring& s) const { \ return ::folly::hash::fnv32_buf(s.data(), s.size() * sizeof(T)); \ } \ -- 2.34.1