X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FFBString.h;h=4882aac577650dc0689c91baa9269e5bf4438390;hp=147235341ff843ecbd5c5662c1e32eda3a1c9f4b;hb=b67b22f4e27773e2e2c155a3629ff8d468bb1286;hpb=9404518285f3d0a1c2780b45067448a35b4ca903 diff --git a/folly/FBString.h b/folly/FBString.h index 14723534..4882aac5 100644 --- a/folly/FBString.h +++ b/folly/FBString.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,58 +17,50 @@ // @author: Andrei Alexandrescu (aalexandre) // String type. -#ifndef FOLLY_BASE_FBSTRING_H_ -#define FOLLY_BASE_FBSTRING_H_ +#pragma once #include +#include +#include #include #include -#include - -#include "folly/Portability.h" -// libc++ doesn't provide this header, nor does msvc -#ifdef FOLLY_HAVE_BITS_CXXCONFIG_H // This file appears in two locations: inside fbcode and in the // libstdc++ source code (when embedding fbstring as std::string). -// To aid in this schizophrenic use, two macros are defined in -// c++config.h: -// _LIBSTDCXX_FBSTRING - Set inside libstdc++. This is useful to -// gate use inside fbcode v. libstdc++ -#include -#endif - +// To aid in this schizophrenic use, _LIBSTDCXX_FBSTRING is defined in +// libstdc++'s c++config.h, to gate use inside fbcode v. libstdc++. #ifdef _LIBSTDCXX_FBSTRING #pragma GCC system_header -// Handle the cases where the fbcode version (folly/Malloc.h) is included -// either before or after this inclusion. -#ifdef FOLLY_MALLOC_H_ -#undef FOLLY_MALLOC_H_ -#include "basic_fbstring_malloc.h" -#else -#include "basic_fbstring_malloc.h" -#undef FOLLY_MALLOC_H_ -#endif +#include "basic_fbstring_malloc.h" // @manual + +// When used as std::string replacement always disable assertions. +#define FBSTRING_ASSERT(expr) /* empty */ #else // !_LIBSTDCXX_FBSTRING -#include -#include +#include +#include + +// libc++ doesn't provide this header, nor does msvc +#ifdef FOLLY_HAVE_BITS_CXXCONFIG_H +#include +#endif + +#include #include +#include +#include +#include -#include "folly/Traits.h" -#include "folly/Malloc.h" -#include "folly/Hash.h" -#include "folly/ScopeGuard.h" +#include +#include +#include +#include -#if FOLLY_HAVE_DEPRECATED_ASSOC -#ifdef _GLIBCXX_SYMVER -#include -#include -#endif -#endif +// When used in folly, assertions are not disabled. +#define FBSTRING_ASSERT(expr) assert(expr) #endif @@ -82,63 +74,74 @@ #define FBSTRING_UNLIKELY(x) (x) #endif +FOLLY_PUSH_WARNING // Ignore shadowing warnings within this file, so includers can use -Wshadow. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" +FOLLY_GCC_DISABLE_WARNING("-Wshadow") +// GCC 4.9 has a false positive in setSmallSize (probably +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124), disable +// compile-time array bound checking. +FOLLY_GCC_DISABLE_WARNING("-Warray-bounds") // FBString cannot use throw when replacing std::string, though it may still // use std::__throw_* +// nolint #define throw FOLLY_FBSTRING_MAY_NOT_USE_THROW #ifdef _LIBSTDCXX_FBSTRING -namespace std _GLIBCXX_VISIBILITY(default) { -_GLIBCXX_BEGIN_NAMESPACE_VERSION +#define FOLLY_FBSTRING_BEGIN_NAMESPACE \ + namespace std _GLIBCXX_VISIBILITY(default) { \ + _GLIBCXX_BEGIN_NAMESPACE_VERSION +#define FOLLY_FBSTRING_END_NAMESPACE \ + _GLIBCXX_END_NAMESPACE_VERSION \ + } // namespace std #else -namespace folly { +#define FOLLY_FBSTRING_BEGIN_NAMESPACE namespace folly { +#define FOLLY_FBSTRING_END_NAMESPACE } // namespace folly #endif -// Different versions of gcc/clang support different versions of -// the address sanitizer attribute. Unfortunately, this attribute -// has issues when inlining is used, so disable that as well. +FOLLY_FBSTRING_BEGIN_NAMESPACE + #if defined(__clang__) # if __has_feature(address_sanitizer) -# if __has_attribute(__no_address_safety_analysis__) -# define FBSTRING_DISABLE_ADDRESS_SANITIZER \ - __attribute__((__no_address_safety_analysis__, __noinline__)) -# elif __has_attribute(__no_sanitize_address__) -# define FBSTRING_DISABLE_ADDRESS_SANITIZER \ - __attribute__((__no_sanitize_address__, __noinline__)) -# endif +# define FBSTRING_SANITIZE_ADDRESS # endif #elif defined (__GNUC__) && \ - (__GNUC__ == 4) && \ - (__GNUC_MINOR__ >= 8) && \ + (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) || (__GNUC__ >= 5)) && \ __SANITIZE_ADDRESS__ -# define FBSTRING_DISABLE_ADDRESS_SANITIZER \ - __attribute__((__no_address_safety_analysis__, __noinline__)) +# define FBSTRING_SANITIZE_ADDRESS #endif -#ifndef FBSTRING_DISABLE_ADDRESS_SANITIZER -# define FBSTRING_DISABLE_ADDRESS_SANITIZER + +// When compiling with ASan, always heap-allocate the string even if +// it would fit in-situ, so that ASan can detect access to the string +// buffer after it has been invalidated (destroyed, resized, etc.). +// Note that this flag doesn't remove support for in-situ strings, as +// that would break ABI-compatibility and wouldn't allow linking code +// compiled with this flag with code compiled without. +#ifdef FBSTRING_SANITIZE_ADDRESS +# define FBSTRING_DISABLE_SSO true +#else +# define FBSTRING_DISABLE_SSO false #endif namespace fbstring_detail { template -inline -OutIt copy_n(InIt b, - typename std::iterator_traits::difference_type n, - OutIt d) { +inline std::pair copy_n( + InIt b, + typename std::iterator_traits::difference_type n, + OutIt d) { for (; n != 0; --n, ++b, ++d) { *d = *b; } - return d; + return std::make_pair(b, d); } template -inline void pod_fill(Pod* b, Pod* e, T c) { - assert(b && e && b <= e); - /*static*/ if (sizeof(T) == 1) { - memset(b, c, e - b); +inline void podFill(Pod* b, Pod* e, T c) { + FBSTRING_ASSERT(b && e && b <= e); + constexpr auto kUseMemset = sizeof(T) == 1; + /* static */ if (kUseMemset) { + memset(b, c, size_t(e - b)); } else { auto const ee = b + ((e - b) & ~7u); for (; b != ee; b += 8) { @@ -167,9 +170,12 @@ inline void pod_fill(Pod* b, Pod* e, T c) { * adaptation outside). */ template -inline void pod_copy(const Pod* b, const Pod* e, Pod* d) { - assert(e >= b); - assert(d >= e || d + (e - b) <= b); +inline void podCopy(const Pod* b, const Pod* e, Pod* d) { + FBSTRING_ASSERT(b != nullptr); + FBSTRING_ASSERT(e != nullptr); + FBSTRING_ASSERT(d != nullptr); + FBSTRING_ASSERT(e >= b); + FBSTRING_ASSERT(d >= e || d + (e - b) <= b); memcpy(d, b, (e - b) * sizeof(Pod)); } @@ -178,11 +184,31 @@ inline void pod_copy(const Pod* b, const Pod* e, Pod* d) { * some asserts */ template -inline void pod_move(const Pod* b, const Pod* e, Pod* d) { - assert(e >= b); +inline void podMove(const Pod* b, const Pod* e, Pod* d) { + FBSTRING_ASSERT(e >= b); memmove(d, b, (e - b) * sizeof(*b)); } +// always inline +#if defined(__GNUC__) // Clang also defines __GNUC__ +# define FBSTRING_ALWAYS_INLINE inline __attribute__((__always_inline__)) +#elif defined(_MSC_VER) +# define FBSTRING_ALWAYS_INLINE __forceinline +#else +# define FBSTRING_ALWAYS_INLINE inline +#endif + +[[noreturn]] FBSTRING_ALWAYS_INLINE void assume_unreachable() { +#if defined(__GNUC__) // Clang also defines __GNUC__ + __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(0); +#else + // Well, it's better than nothing. + std::abort(); +#endif +} + } // namespace fbstring_detail /** @@ -205,7 +231,7 @@ enum class AcquireMallocatedString {}; template class fbstring_core_model { -public: + public: fbstring_core_model(); fbstring_core_model(const fbstring_core_model &); ~fbstring_core_model(); @@ -218,7 +244,7 @@ public: // e.g. reference-counted implementation to fork the data. The // pointer is guaranteed to be valid until the next call to a // non-const member function. - Char * mutable_data(); + Char* mutableData(); // Returns a pointer to string's buffer and guarantees that a // readable '\0' lies right after the buffer. The pointer is // guaranteed to be valid until the next call to a non-const member @@ -229,10 +255,14 @@ public: void shrink(size_t delta); // Expands the string by delta characters (i.e. after this call // size() will report the old size() plus delta) but without - // initializing the expanded region. Returns a pointer to the memory - // to be initialized (the beginning of the expanded portion). The - // caller is expected to fill the expanded area appropriately. - Char* expand_noinit(size_t delta); + // initializing the expanded region. The expanded region is + // zero-terminated. Returns a pointer to the memory to be + // initialized (the beginning of the expanded portion). The caller + // is expected to fill the expanded area appropriately. + // If expGrowth is true, exponential growth is guaranteed. + // It is not guaranteed not to reallocate even if size() + delta < + // capacity(), so all references to the buffer are invalidated. + Char* expandNoinit(size_t delta, bool expGrowth); // Expands the string by one character and sets the last character // to c. void push_back(Char c); @@ -250,25 +280,16 @@ public: // the string without reallocation. For reference-counted strings, // it should fork the data even if minCapacity < size(). void reserve(size_t minCapacity); -private: + private: // Do not implement fbstring_core_model& operator=(const fbstring_core_model &); }; */ -/** - * gcc-4.7 throws what appears to be some false positive uninitialized - * warnings for the members of the MediumLarge struct. So, mute them here. - */ -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wuninitialized" -#endif - /** * This is the core of the string. The code should work on 32- and - * 64-bit architectures and with any Char size. Porting to big endian - * architectures would require some changes. + * 64-bit and both big- and little-endianan architectures with any + * Char size. * * The storage is selected as follows (assuming we store one-byte * characters on a 64-bit machine): (a) "small" strings between 0 and @@ -280,162 +301,79 @@ private: * reference-counted and copied lazily. the reference count is * allocated right before the character array. * - * The discriminator between these three strategies sits in the two - * most significant bits of the rightmost char of the storage. If - * neither is set, then the string is small (and its length sits in - * the lower-order bits of that rightmost character). If the MSb is - * set, the string is medium width. If the second MSb is set, then the - * string is large. + * The discriminator between these three strategies sits in two + * bits of the rightmost char of the storage. If neither is set, then the + * string is small (and its length sits in the lower-order bits on + * little-endian or the high-order bits on big-endian of that + * rightmost character). If the MSb is set, the string is medium width. + * If the second MSb is set, then the string is large. On little-endian, + * these 2 bits are the 2 MSbs of MediumLarge::capacity_, while on + * big-endian, these 2 bits are the 2 LSbs. This keeps both little-endian + * and big-endian fbstring_core equivalent with merely different ops used + * to extract capacity/category. */ template class fbstring_core { -public: - fbstring_core() noexcept { - // Only initialize the tag, will set the MSBs (i.e. the small - // string size) to zero too - ml_.capacity_ = maxSmallSize << (8 * (sizeof(size_t) - sizeof(Char))); - // or: setSmallSize(0); - writeTerminator(); - assert(category() == isSmall && size() == 0); - } + protected: +// It's MSVC, so we just have to guess ... and allow an override +#ifdef _MSC_VER +# ifdef FOLLY_ENDIAN_BE + static constexpr auto kIsLittleEndian = false; +# else + static constexpr auto kIsLittleEndian = true; +# endif +#else + static constexpr auto kIsLittleEndian = + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; +#endif + public: + fbstring_core() noexcept { reset(); } fbstring_core(const fbstring_core & rhs) { - assert(&rhs != this); - // Simplest case first: small strings are bitblitted - if (rhs.category() == isSmall) { - static_assert(offsetof(MediumLarge, data_) == 0, - "fbstring layout failure"); - static_assert(offsetof(MediumLarge, size_) == sizeof(ml_.data_), - "fbstring layout failure"); - static_assert(offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_), - "fbstring layout failure"); - const size_t size = rhs.smallSize(); - if (size == 0) { - ml_.capacity_ = rhs.ml_.capacity_; - writeTerminator(); - } else { - // Just write the whole thing, don't look at details. In - // particular we need to copy capacity anyway because we want - // to set the size (don't forget that the last character, - // which stores a short string's length, is shared with the - // ml_.capacity field). - ml_ = rhs.ml_; - } - assert(category() == isSmall && this->size() == rhs.size()); - } else if (rhs.category() == isLarge) { - // Large strings are just refcounted - ml_ = rhs.ml_; - RefCounted::incrementRefs(ml_.data_); - assert(category() == isLarge && size() == rhs.size()); - } else { - // Medium strings are copied eagerly. Don't forget to allocate - // one extra Char for the null terminator. - auto const allocSize = - goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char)); - ml_.data_ = static_cast(checkedMalloc(allocSize)); - fbstring_detail::pod_copy(rhs.ml_.data_, - // 1 for terminator - rhs.ml_.data_ + rhs.ml_.size_ + 1, - ml_.data_); - // No need for writeTerminator() here, we copied one extra - // element just above. - ml_.size_ = rhs.ml_.size_; - ml_.capacity_ = (allocSize / sizeof(Char) - 1) | isMedium; - assert(category() == isMedium); + FBSTRING_ASSERT(&rhs != this); + switch (rhs.category()) { + case Category::isSmall: + copySmall(rhs); + break; + case Category::isMedium: + copyMedium(rhs); + break; + case Category::isLarge: + copyLarge(rhs); + break; + default: + fbstring_detail::assume_unreachable(); } - assert(size() == rhs.size()); - assert(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0); + FBSTRING_ASSERT(size() == rhs.size()); + FBSTRING_ASSERT(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0); } fbstring_core(fbstring_core&& goner) noexcept { - if (goner.category() == isSmall) { - // Just copy, leave the goner in peace - new(this) fbstring_core(goner.small_, goner.smallSize()); - } else { - // Take goner's guts - ml_ = goner.ml_; - // Clean goner's carcass - goner.setSmallSize(0); - } + // Take goner's guts + ml_ = goner.ml_; + // Clean goner's carcass + goner.reset(); } - // NOTE(agallagher): The word-aligned copy path copies bytes which are - // outside the range of the string, and makes address sanitizer unhappy, - // so just disable it on this function. - fbstring_core(const Char *const data, const size_t size) - FBSTRING_DISABLE_ADDRESS_SANITIZER { -#ifndef NDEBUG -#ifndef _LIBSTDCXX_FBSTRING - SCOPE_EXIT { - assert(this->size() == size); - assert(memcmp(this->data(), data, size * sizeof(Char)) == 0); - }; -#endif -#endif - - // Simplest case first: small strings are bitblitted - if (size <= maxSmallSize) { - // Layout is: Char* data_, size_t size_, size_t capacity_ - static_assert(sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t), - "fbstring has unexpected size"); - static_assert(sizeof(Char*) == sizeof(size_t), - "fbstring size assumption violation"); - // sizeof(size_t) must be a power of 2 - static_assert((sizeof(size_t) & (sizeof(size_t) - 1)) == 0, - "fbstring size assumption violation"); - - // If data is aligned, use fast word-wise copying. Otherwise, - // use conservative memcpy. - if (reinterpret_cast(data) & (sizeof(size_t) - 1)) { - fbstring_detail::pod_copy(data, data + size, small_); - } else { - // Copy one word (64 bits) at a time - const size_t byteSize = size * sizeof(Char); - if (byteSize > 2 * sizeof(size_t)) { - // Copy three words - ml_.capacity_ = reinterpret_cast(data)[2]; - copyTwo: - ml_.size_ = reinterpret_cast(data)[1]; - copyOne: - ml_.data_ = *reinterpret_cast(const_cast(data)); - } else if (byteSize > sizeof(size_t)) { - // Copy two words - goto copyTwo; - } else if (size > 0) { - // Copy one word - goto copyOne; - } - } - setSmallSize(size); - return; + fbstring_core(const Char *const data, + const size_t size, + bool disableSSO = FBSTRING_DISABLE_SSO) { + if (!disableSSO && size <= maxSmallSize) { + initSmall(data, size); } else if (size <= maxMediumSize) { - // Medium strings are allocated normally. Don't forget to - // allocate one extra Char for the terminating null. - auto const allocSize = goodMallocSize((1 + size) * sizeof(Char)); - ml_.data_ = static_cast(checkedMalloc(allocSize)); - fbstring_detail::pod_copy(data, data + size, ml_.data_); - ml_.size_ = size; - ml_.capacity_ = (allocSize / sizeof(Char) - 1) | isMedium; + initMedium(data, size); } else { - // Large strings are allocated differently - size_t effectiveCapacity = size; - auto const newRC = RefCounted::create(data, & effectiveCapacity); - ml_.data_ = newRC->data_; - ml_.size_ = size; - ml_.capacity_ = effectiveCapacity | isLarge; + initLarge(data, size); } - writeTerminator(); + FBSTRING_ASSERT(this->size() == size); + FBSTRING_ASSERT( + size == 0 || memcmp(this->data(), data, size * sizeof(Char)) == 0); } ~fbstring_core() noexcept { - auto const c = category(); - if (c == isSmall) { + if (category() == Category::isSmall) { return; } - if (c == isMedium) { - free(ml_.data_); - return; - } - RefCounted::decrementRefs(ml_.data_); + destroyMediumLarge(); } // Snatches a previously mallocated string. The parameter "size" @@ -450,17 +388,17 @@ public: const size_t allocatedSize, AcquireMallocatedString) { if (size > 0) { - assert(allocatedSize >= size + 1); - assert(data[size] == '\0'); + FBSTRING_ASSERT(allocatedSize >= size + 1); + FBSTRING_ASSERT(data[size] == '\0'); // Use the medium string storage ml_.data_ = data; ml_.size_ = size; // Don't forget about null terminator - ml_.capacity_ = (allocatedSize - 1) | isMedium; + ml_.setCapacity(allocatedSize - 1, Category::isMedium); } else { // No need for the memory free(data); - setSmallSize(0); + reset(); } } @@ -479,265 +417,131 @@ public: return c_str(); } - Char * mutable_data() { - auto const c = category(); - if (c == isSmall) { + Char* mutableData() { + switch (category()) { + case Category::isSmall: return small_; + case Category::isMedium: + return ml_.data_; + case Category::isLarge: + return mutableDataLarge(); } - assert(c == isMedium || c == isLarge); - if (c == isLarge && RefCounted::refs(ml_.data_) > 1) { - // Ensure unique. - size_t effectiveCapacity = ml_.capacity(); - auto const newRC = RefCounted::create(& effectiveCapacity); - // If this fails, someone placed the wrong capacity in an - // fbstring. - assert(effectiveCapacity >= ml_.capacity()); - fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1, - newRC->data_); - RefCounted::decrementRefs(ml_.data_); - ml_.data_ = newRC->data_; - // No need to call writeTerminator(), we have + 1 above. - } - return ml_.data_; + fbstring_detail::assume_unreachable(); } - const Char * c_str() const { - auto const c = category(); - if (c == isSmall) { - assert(small_[smallSize()] == '\0'); - return small_; - } - assert(c == isMedium || c == isLarge); - assert(ml_.data_[ml_.size_] == '\0'); - return ml_.data_; + const Char* c_str() const { + const Char* ptr = ml_.data_; + // With this syntax, GCC and Clang generate a CMOV instead of a branch. + ptr = (category() == Category::isSmall) ? small_ : ptr; + return ptr; } void shrink(const size_t delta) { - if (category() == isSmall) { - // Check for underflow - assert(delta <= smallSize()); - setSmallSize(smallSize() - delta); - } else if (category() == isMedium || RefCounted::refs(ml_.data_) == 1) { - // Medium strings and unique large strings need no special - // handling. - assert(ml_.size_ >= delta); - ml_.size_ -= delta; - writeTerminator(); + if (category() == Category::isSmall) { + shrinkSmall(delta); + } else if (category() == Category::isMedium || + RefCounted::refs(ml_.data_) == 1) { + shrinkMedium(delta); } else { - assert(ml_.size_ >= delta); - // Shared large string, must make unique. This is because of the - // durn terminator must be written, which may trample the shared - // data. - if (delta) { - fbstring_core(ml_.data_, ml_.size_ - delta).swap(*this); - } - // No need to write the terminator. + shrinkLarge(delta); } } - void reserve(size_t minCapacity) { - if (category() == isLarge) { - // Ensure unique - if (RefCounted::refs(ml_.data_) > 1) { - // We must make it unique regardless; in-place reallocation is - // useless if the string is shared. In order to not surprise - // people, reserve the new block at current capacity or - // more. That way, a string's capacity never shrinks after a - // call to reserve. - minCapacity = std::max(minCapacity, ml_.capacity()); - auto const newRC = RefCounted::create(& minCapacity); - fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1, - newRC->data_); - // Done with the old data. No need to call writeTerminator(), - // we have + 1 above. - RefCounted::decrementRefs(ml_.data_); - ml_.data_ = newRC->data_; - ml_.capacity_ = minCapacity | isLarge; - // size remains unchanged - } else { - // String is not shared, so let's try to realloc (if needed) - if (minCapacity > ml_.capacity()) { - // Asking for more memory - auto const newRC = - RefCounted::reallocate(ml_.data_, ml_.size_, - ml_.capacity(), minCapacity); - ml_.data_ = newRC->data_; - ml_.capacity_ = minCapacity | isLarge; - writeTerminator(); - } - assert(capacity() >= minCapacity); - } - } else if (category() == isMedium) { - // String is not shared - if (minCapacity <= ml_.capacity()) { - return; // nothing to do, there's enough room - } - if (minCapacity <= maxMediumSize) { - // Keep the string at medium size. Don't forget to allocate - // one extra Char for the terminating null. - size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); - ml_.data_ = static_cast( - smartRealloc( - ml_.data_, - ml_.size_ * sizeof(Char), - (ml_.capacity() + 1) * sizeof(Char), - capacityBytes)); - writeTerminator(); - ml_.capacity_ = (capacityBytes / sizeof(Char) - 1) | isMedium; - } else { - // Conversion from medium to large string - fbstring_core nascent; - // Will recurse to another branch of this function - nascent.reserve(minCapacity); - nascent.ml_.size_ = ml_.size_; - fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_, - nascent.ml_.data_); - nascent.swap(*this); - writeTerminator(); - assert(capacity() >= minCapacity); - } - } else { - assert(category() == isSmall); - if (minCapacity > maxMediumSize) { - // large - auto const newRC = RefCounted::create(& minCapacity); - auto const size = smallSize(); - fbstring_detail::pod_copy(small_, small_ + size + 1, newRC->data_); - // No need for writeTerminator(), we wrote it above with + 1. - ml_.data_ = newRC->data_; - ml_.size_ = size; - ml_.capacity_ = minCapacity | isLarge; - assert(capacity() >= minCapacity); - } else if (minCapacity > maxSmallSize) { - // medium - // Don't forget to allocate one extra Char for the terminating null - auto const allocSizeBytes = - goodMallocSize((1 + minCapacity) * sizeof(Char)); - auto const data = static_cast(checkedMalloc(allocSizeBytes)); - auto const size = smallSize(); - fbstring_detail::pod_copy(small_, small_ + size + 1, data); - // No need for writeTerminator(), we wrote it above with + 1. - ml_.data_ = data; - ml_.size_ = size; - ml_.capacity_ = (allocSizeBytes / sizeof(Char) - 1) | isMedium; - } else { - // small - // Nothing to do, everything stays put - } - } - assert(capacity() >= minCapacity); - } - - Char * expand_noinit(const size_t delta) { - // Strategy is simple: make room, then change size - assert(capacity() >= size()); - size_t sz, newSz; - if (category() == isSmall) { - sz = smallSize(); - newSz = sz + delta; - if (newSz <= maxSmallSize) { - setSmallSize(newSz); - return small_ + sz; - } - reserve(newSz); - } else { - sz = ml_.size_; - newSz = ml_.size_ + delta; - if (newSz > capacity()) { - reserve(newSz); - } + FOLLY_MALLOC_NOINLINE + void reserve(size_t minCapacity, bool disableSSO = FBSTRING_DISABLE_SSO) { + switch (category()) { + case Category::isSmall: + reserveSmall(minCapacity, disableSSO); + break; + case Category::isMedium: + reserveMedium(minCapacity); + break; + case Category::isLarge: + reserveLarge(minCapacity); + break; + default: + fbstring_detail::assume_unreachable(); } - assert(capacity() >= newSz); - // Category can't be small - we took care of that above - assert(category() == isMedium || category() == isLarge); - ml_.size_ = newSz; - writeTerminator(); - assert(size() == newSz); - return ml_.data_ + sz; + FBSTRING_ASSERT(capacity() >= minCapacity); } + Char* expandNoinit( + const size_t delta, + bool expGrowth = false, + bool disableSSO = FBSTRING_DISABLE_SSO); + void push_back(Char c) { - assert(capacity() >= size()); - size_t sz; - if (category() == isSmall) { - sz = smallSize(); - if (sz < maxSmallSize) { - small_[sz] = c; - setSmallSize(sz + 1); - return; - } - reserve(maxSmallSize * 2); - } else { - sz = ml_.size_; - if (sz == capacity()) { // always true for isShared() - reserve(1 + sz * 3 / 2); // ensures not shared - } - } - assert(!isShared()); - assert(capacity() >= sz + 1); - // Category can't be small - we took care of that above - assert(category() == isMedium || category() == isLarge); - ml_.size_ = sz + 1; - ml_.data_[sz] = c; - writeTerminator(); + *expandNoinit(1, /* expGrowth = */ true) = c; } size_t size() const { - return category() == isSmall ? smallSize() : ml_.size_; + size_t ret = ml_.size_; + /* static */ if (kIsLittleEndian) { + // We can save a couple instructions, because the category is + // small iff the last char, as unsigned, is <= maxSmallSize. + typedef typename std::make_unsigned::type UChar; + auto maybeSmallSize = size_t(maxSmallSize) - + size_t(static_cast(small_[maxSmallSize])); + // With this syntax, GCC and Clang generate a CMOV instead of a branch. + ret = (static_cast(maybeSmallSize) >= 0) ? maybeSmallSize : ret; + } else { + ret = (category() == Category::isSmall) ? smallSize() : ret; + } + return ret; } size_t capacity() const { switch (category()) { - case isSmall: + case Category::isSmall: return maxSmallSize; - case isLarge: + case Category::isLarge: // For large-sized strings, a multi-referenced chunk has no // available capacity. This is because any attempt to append // data would trigger a new allocation. - if (RefCounted::refs(ml_.data_) > 1) return ml_.size_; - default: {} + if (RefCounted::refs(ml_.data_) > 1) { + return ml_.size_; + } + break; + default: + break; } return ml_.capacity(); } bool isShared() const { - return category() == isLarge && RefCounted::refs(ml_.data_) > 1; - } - - void writeTerminator() { - if (category() == isSmall) { - const auto s = smallSize(); - if (s != maxSmallSize) { - small_[s] = '\0'; - } - } else { - ml_.data_[ml_.size_] = '\0'; - } + return category() == Category::isLarge && RefCounted::refs(ml_.data_) > 1; } -private: + private: // Disabled fbstring_core & operator=(const fbstring_core & rhs); - struct MediumLarge { - Char * data_; - size_t size_; - size_t capacity_; + void reset() { + setSmallSize(0); + } - size_t capacity() const { - return capacity_ & capacityExtractMask; + FOLLY_MALLOC_NOINLINE void destroyMediumLarge() noexcept { + auto const c = category(); + FBSTRING_ASSERT(c != Category::isSmall); + if (c == Category::isMedium) { + free(ml_.data_); + } else { + RefCounted::decrementRefs(ml_.data_); } - }; + } struct RefCounted { std::atomic refCount_; Char data_[1]; + constexpr static size_t getDataOffset() { + return offsetof(RefCounted, data_); + } + static RefCounted * fromData(Char * p) { - return static_cast( - static_cast( - static_cast(static_cast(p)) - - sizeof(refCount_))); + return static_cast(static_cast( + static_cast(static_cast(p)) - + getDataOffset())); } static size_t refs(Char * p) { @@ -751,99 +555,432 @@ private: static void decrementRefs(Char * p) { auto const dis = fromData(p); size_t oldcnt = dis->refCount_.fetch_sub(1, std::memory_order_acq_rel); - assert(oldcnt > 0); + FBSTRING_ASSERT(oldcnt > 0); if (oldcnt == 1) { free(dis); } } static RefCounted * create(size_t * size) { - // Don't forget to allocate one extra Char for the terminating - // null. In this case, however, one Char is already part of the - // struct. - const size_t allocSize = goodMallocSize( - sizeof(RefCounted) + *size * sizeof(Char)); + const size_t allocSize = + goodMallocSize(getDataOffset() + (*size + 1) * sizeof(Char)); auto result = static_cast(checkedMalloc(allocSize)); result->refCount_.store(1, std::memory_order_release); - *size = (allocSize - sizeof(RefCounted)) / sizeof(Char); + *size = (allocSize - getDataOffset()) / sizeof(Char) - 1; return result; } static RefCounted * create(const Char * data, size_t * size) { const size_t effectiveSize = *size; auto result = create(size); - fbstring_detail::pod_copy(data, data + effectiveSize, result->data_); + if (FBSTRING_LIKELY(effectiveSize > 0)) { + fbstring_detail::podCopy(data, data + effectiveSize, result->data_); + } return result; } static RefCounted * reallocate(Char *const data, const size_t currentSize, const size_t currentCapacity, - const size_t newCapacity) { - assert(newCapacity > 0 && newCapacity > currentSize); + size_t * newCapacity) { + FBSTRING_ASSERT(*newCapacity > 0 && *newCapacity > currentSize); + const size_t allocNewCapacity = + goodMallocSize(getDataOffset() + (*newCapacity + 1) * sizeof(Char)); auto const dis = fromData(data); - assert(dis->refCount_.load(std::memory_order_acquire) == 1); - // Don't forget to allocate one extra Char for the terminating - // null. In this case, however, one Char is already part of the - // struct. - auto result = static_cast( - smartRealloc(dis, - sizeof(RefCounted) + currentSize * sizeof(Char), - sizeof(RefCounted) + currentCapacity * sizeof(Char), - sizeof(RefCounted) + newCapacity * sizeof(Char))); - assert(result->refCount_.load(std::memory_order_acquire) == 1); + FBSTRING_ASSERT(dis->refCount_.load(std::memory_order_acquire) == 1); + auto result = static_cast(smartRealloc( + dis, + getDataOffset() + (currentSize + 1) * sizeof(Char), + getDataOffset() + (currentCapacity + 1) * sizeof(Char), + allocNewCapacity)); + FBSTRING_ASSERT(result->refCount_.load(std::memory_order_acquire) == 1); + *newCapacity = (allocNewCapacity - getDataOffset()) / sizeof(Char) - 1; return result; } }; - union { - mutable Char small_[sizeof(MediumLarge) / sizeof(Char)]; - mutable MediumLarge ml_; - }; - - enum { - lastChar = sizeof(MediumLarge) - 1, - maxSmallSize = lastChar / sizeof(Char), - maxMediumSize = 254 / sizeof(Char), // coincides with the small - // bin size in dlmalloc - categoryExtractMask = sizeof(size_t) == 4 ? 0xC0000000 : 0xC000000000000000, - capacityExtractMask = ~categoryExtractMask, - }; - static_assert(!(sizeof(MediumLarge) % sizeof(Char)), - "Corrupt memory layout for fbstring."); + typedef uint8_t category_type; - enum Category { + enum class Category : category_type { isSmall = 0, - isMedium = sizeof(size_t) == 4 ? 0x80000000 : 0x8000000000000000, - isLarge = sizeof(size_t) == 4 ? 0x40000000 : 0x4000000000000000, + isMedium = kIsLittleEndian ? 0x80 : 0x2, + isLarge = kIsLittleEndian ? 0x40 : 0x1, }; Category category() const { - // Assumes little endian - return static_cast(ml_.capacity_ & categoryExtractMask); + // works for both big-endian and little-endian + return static_cast(bytes_[lastChar] & categoryExtractMask); } + struct MediumLarge { + Char * data_; + size_t size_; + size_t capacity_; + + size_t capacity() const { + return kIsLittleEndian + ? capacity_ & capacityExtractMask + : capacity_ >> 2; + } + + void setCapacity(size_t cap, Category cat) { + capacity_ = kIsLittleEndian + ? cap | (static_cast(cat) << kCategoryShift) + : (cap << 2) | static_cast(cat); + } + }; + + union { + uint8_t bytes_[sizeof(MediumLarge)]; // For accessing the last byte. + Char small_[sizeof(MediumLarge) / sizeof(Char)]; + MediumLarge ml_; + }; + + constexpr static size_t lastChar = sizeof(MediumLarge) - 1; + constexpr static size_t maxSmallSize = lastChar / sizeof(Char); + constexpr static size_t maxMediumSize = 254 / sizeof(Char); + constexpr static uint8_t categoryExtractMask = kIsLittleEndian ? 0xC0 : 0x3; + constexpr static size_t kCategoryShift = (sizeof(size_t) - 1) * 8; + constexpr static size_t capacityExtractMask = kIsLittleEndian + ? ~(size_t(categoryExtractMask) << kCategoryShift) + : 0x0 /* unused */; + + static_assert(!(sizeof(MediumLarge) % sizeof(Char)), + "Corrupt memory layout for fbstring."); + size_t smallSize() const { - assert(category() == isSmall && - static_cast(small_[maxSmallSize]) - <= static_cast(maxSmallSize)); - return static_cast(maxSmallSize) - - static_cast(small_[maxSmallSize]); + FBSTRING_ASSERT(category() == Category::isSmall); + constexpr auto shift = kIsLittleEndian ? 0 : 2; + auto smallShifted = static_cast(small_[maxSmallSize]) >> shift; + FBSTRING_ASSERT(static_cast(maxSmallSize) >= smallShifted); + return static_cast(maxSmallSize) - smallShifted; } void setSmallSize(size_t s) { // Warning: this should work with uninitialized strings too, // so don't assume anything about the previous value of // small_[maxSmallSize]. - assert(s <= maxSmallSize); - small_[maxSmallSize] = maxSmallSize - s; - writeTerminator(); + FBSTRING_ASSERT(s <= maxSmallSize); + constexpr auto shift = kIsLittleEndian ? 0 : 2; + small_[maxSmallSize] = char((maxSmallSize - s) << shift); + small_[s] = '\0'; + FBSTRING_ASSERT(category() == Category::isSmall && size() == s); } + + void copySmall(const fbstring_core&); + void copyMedium(const fbstring_core&); + void copyLarge(const fbstring_core&); + + void initSmall(const Char* data, size_t size); + void initMedium(const Char* data, size_t size); + void initLarge(const Char* data, size_t size); + + void reserveSmall(size_t minCapacity, bool disableSSO); + void reserveMedium(size_t minCapacity); + void reserveLarge(size_t minCapacity); + + void shrinkSmall(size_t delta); + void shrinkMedium(size_t delta); + void shrinkLarge(size_t delta); + + void unshare(size_t minCapacity = 0); + Char* mutableDataLarge(); }; -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic pop +template +inline void fbstring_core::copySmall(const fbstring_core& rhs) { + static_assert(offsetof(MediumLarge, data_) == 0, "fbstring layout failure"); + static_assert( + offsetof(MediumLarge, size_) == sizeof(ml_.data_), + "fbstring layout failure"); + static_assert( + offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_), + "fbstring layout failure"); + // Just write the whole thing, don't look at details. In + // particular we need to copy capacity anyway because we want + // to set the size (don't forget that the last character, + // which stores a short string's length, is shared with the + // ml_.capacity field). + ml_ = rhs.ml_; + FBSTRING_ASSERT( + category() == Category::isSmall && this->size() == rhs.size()); +} + +template +FOLLY_MALLOC_NOINLINE inline void fbstring_core::copyMedium( + const fbstring_core& rhs) { + // Medium strings are copied eagerly. Don't forget to allocate + // one extra Char for the null terminator. + auto const allocSize = goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char)); + ml_.data_ = static_cast(checkedMalloc(allocSize)); + // Also copies terminator. + fbstring_detail::podCopy( + rhs.ml_.data_, rhs.ml_.data_ + rhs.ml_.size_ + 1, ml_.data_); + ml_.size_ = rhs.ml_.size_; + ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); + FBSTRING_ASSERT(category() == Category::isMedium); +} + +template +FOLLY_MALLOC_NOINLINE inline void fbstring_core::copyLarge( + const fbstring_core& rhs) { + // Large strings are just refcounted + ml_ = rhs.ml_; + RefCounted::incrementRefs(ml_.data_); + FBSTRING_ASSERT(category() == Category::isLarge && size() == rhs.size()); +} + +// Small strings are bitblitted +template +inline void fbstring_core::initSmall( + const Char* const data, const size_t size) { + // Layout is: Char* data_, size_t size_, size_t capacity_ + static_assert( + sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t), + "fbstring has unexpected size"); + static_assert( + sizeof(Char*) == sizeof(size_t), "fbstring size assumption violation"); + // sizeof(size_t) must be a power of 2 + static_assert( + (sizeof(size_t) & (sizeof(size_t) - 1)) == 0, + "fbstring size assumption violation"); + +// If data is aligned, use fast word-wise copying. Otherwise, +// use conservative memcpy. +// The word-wise path reads bytes which are outside the range of +// the string, and makes ASan unhappy, so we disable it when +// compiling with ASan. +#ifndef FBSTRING_SANITIZE_ADDRESS + if ((reinterpret_cast(data) & (sizeof(size_t) - 1)) == 0) { + const size_t byteSize = size * sizeof(Char); + constexpr size_t wordWidth = sizeof(size_t); + switch ((byteSize + wordWidth - 1) / wordWidth) { // Number of words. + case 3: + ml_.capacity_ = reinterpret_cast(data)[2]; + FOLLY_FALLTHROUGH; + case 2: + ml_.size_ = reinterpret_cast(data)[1]; + FOLLY_FALLTHROUGH; + case 1: + ml_.data_ = *reinterpret_cast(const_cast(data)); + FOLLY_FALLTHROUGH; + case 0: + break; + } + } else #endif + { + if (size != 0) { + fbstring_detail::podCopy(data, data + size, small_); + } + } + setSmallSize(size); +} + +template +FOLLY_MALLOC_NOINLINE inline void fbstring_core::initMedium( + const Char* const data, const size_t size) { + // Medium strings are allocated normally. Don't forget to + // allocate one extra Char for the terminating null. + auto const allocSize = goodMallocSize((1 + size) * sizeof(Char)); + ml_.data_ = static_cast(checkedMalloc(allocSize)); + if (FBSTRING_LIKELY(size > 0)) { + fbstring_detail::podCopy(data, data + size, ml_.data_); + } + ml_.size_ = size; + ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); + ml_.data_[size] = '\0'; +} + +template +FOLLY_MALLOC_NOINLINE inline void fbstring_core::initLarge( + const Char* const data, const size_t size) { + // Large strings are allocated differently + size_t effectiveCapacity = size; + auto const newRC = RefCounted::create(data, &effectiveCapacity); + ml_.data_ = newRC->data_; + ml_.size_ = size; + ml_.setCapacity(effectiveCapacity, Category::isLarge); + ml_.data_[size] = '\0'; +} + +template +FOLLY_MALLOC_NOINLINE inline void fbstring_core::unshare( + size_t minCapacity) { + FBSTRING_ASSERT(category() == Category::isLarge); + size_t effectiveCapacity = std::max(minCapacity, ml_.capacity()); + auto const newRC = RefCounted::create(&effectiveCapacity); + // If this fails, someone placed the wrong capacity in an + // fbstring. + FBSTRING_ASSERT(effectiveCapacity >= ml_.capacity()); + // Also copies terminator. + fbstring_detail::podCopy(ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_); + RefCounted::decrementRefs(ml_.data_); + ml_.data_ = newRC->data_; + ml_.setCapacity(effectiveCapacity, Category::isLarge); + // size_ remains unchanged. +} + +template +inline Char* fbstring_core::mutableDataLarge() { + FBSTRING_ASSERT(category() == Category::isLarge); + if (RefCounted::refs(ml_.data_) > 1) { // Ensure unique. + unshare(); + } + return ml_.data_; +} + +template +FOLLY_MALLOC_NOINLINE inline void fbstring_core::reserveLarge( + size_t minCapacity) { + FBSTRING_ASSERT(category() == Category::isLarge); + if (RefCounted::refs(ml_.data_) > 1) { // Ensure unique + // We must make it unique regardless; in-place reallocation is + // useless if the string is shared. In order to not surprise + // people, reserve the new block at current capacity or + // more. That way, a string's capacity never shrinks after a + // call to reserve. + unshare(minCapacity); + } else { + // String is not shared, so let's try to realloc (if needed) + if (minCapacity > ml_.capacity()) { + // Asking for more memory + auto const newRC = RefCounted::reallocate( + ml_.data_, ml_.size_, ml_.capacity(), &minCapacity); + ml_.data_ = newRC->data_; + ml_.setCapacity(minCapacity, Category::isLarge); + } + FBSTRING_ASSERT(capacity() >= minCapacity); + } +} + +template +FOLLY_MALLOC_NOINLINE inline void fbstring_core::reserveMedium( + const size_t minCapacity) { + FBSTRING_ASSERT(category() == Category::isMedium); + // String is not shared + if (minCapacity <= ml_.capacity()) { + return; // nothing to do, there's enough room + } + if (minCapacity <= maxMediumSize) { + // Keep the string at medium size. Don't forget to allocate + // one extra Char for the terminating null. + size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); + // Also copies terminator. + ml_.data_ = static_cast(smartRealloc( + ml_.data_, + (ml_.size_ + 1) * sizeof(Char), + (ml_.capacity() + 1) * sizeof(Char), + capacityBytes)); + ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium); + } else { + // Conversion from medium to large string + fbstring_core nascent; + // Will recurse to another branch of this function + nascent.reserve(minCapacity); + nascent.ml_.size_ = ml_.size_; + // Also copies terminator. + fbstring_detail::podCopy( + ml_.data_, ml_.data_ + ml_.size_ + 1, nascent.ml_.data_); + nascent.swap(*this); + FBSTRING_ASSERT(capacity() >= minCapacity); + } +} + +template +FOLLY_MALLOC_NOINLINE inline void fbstring_core::reserveSmall( + size_t minCapacity, const bool disableSSO) { + FBSTRING_ASSERT(category() == Category::isSmall); + if (!disableSSO && minCapacity <= maxSmallSize) { + // small + // Nothing to do, everything stays put + } else if (minCapacity <= maxMediumSize) { + // medium + // Don't forget to allocate one extra Char for the terminating null + auto const allocSizeBytes = + goodMallocSize((1 + minCapacity) * sizeof(Char)); + auto const pData = static_cast(checkedMalloc(allocSizeBytes)); + auto const size = smallSize(); + // Also copies terminator. + fbstring_detail::podCopy(small_, small_ + size + 1, pData); + ml_.data_ = pData; + ml_.size_ = size; + ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium); + } else { + // large + auto const newRC = RefCounted::create(&minCapacity); + auto const size = smallSize(); + // Also copies terminator. + fbstring_detail::podCopy(small_, small_ + size + 1, newRC->data_); + ml_.data_ = newRC->data_; + ml_.size_ = size; + ml_.setCapacity(minCapacity, Category::isLarge); + FBSTRING_ASSERT(capacity() >= minCapacity); + } +} + +template +inline Char* fbstring_core::expandNoinit( + const size_t delta, + bool expGrowth, /* = false */ + bool disableSSO /* = FBSTRING_DISABLE_SSO */) { + // Strategy is simple: make room, then change size + FBSTRING_ASSERT(capacity() >= size()); + size_t sz, newSz; + if (category() == Category::isSmall) { + sz = smallSize(); + newSz = sz + delta; + if (!disableSSO && FBSTRING_LIKELY(newSz <= maxSmallSize)) { + setSmallSize(newSz); + return small_ + sz; + } + reserveSmall( + expGrowth ? std::max(newSz, 2 * maxSmallSize) : newSz, disableSSO); + } else { + sz = ml_.size_; + newSz = sz + delta; + if (FBSTRING_UNLIKELY(newSz > capacity())) { + // ensures not shared + reserve(expGrowth ? std::max(newSz, 1 + capacity() * 3 / 2) : newSz); + } + } + FBSTRING_ASSERT(capacity() >= newSz); + // Category can't be small - we took care of that above + FBSTRING_ASSERT( + category() == Category::isMedium || category() == Category::isLarge); + ml_.size_ = newSz; + ml_.data_[newSz] = '\0'; + FBSTRING_ASSERT(size() == newSz); + return ml_.data_ + sz; +} + +template +inline void fbstring_core::shrinkSmall(const size_t delta) { + // Check for underflow + FBSTRING_ASSERT(delta <= smallSize()); + setSmallSize(smallSize() - delta); +} + +template +inline void fbstring_core::shrinkMedium(const size_t delta) { + // Medium strings and unique large strings need no special + // handling. + FBSTRING_ASSERT(ml_.size_ >= delta); + ml_.size_ -= delta; + ml_.data_[ml_.size_] = '\0'; +} + +template +inline void fbstring_core::shrinkLarge(const size_t delta) { + FBSTRING_ASSERT(ml_.size_ >= delta); + // Shared large string, must make unique. This is because of the + // durn terminator must be written, which may trample the shared + // data. + if (delta) { + fbstring_core(ml_.data_, ml_.size_ - delta).swap(*this); + } + // No need to write the terminator. +} #ifndef _LIBSTDCXX_FBSTRING /** @@ -852,7 +989,7 @@ private: */ template class dummy_fbstring_core { -public: + public: dummy_fbstring_core() { } dummy_fbstring_core(const dummy_fbstring_core& another) @@ -867,15 +1004,14 @@ public: const Char * data() const { return backend_.data(); } - Char * mutable_data() { - //assert(!backend_.empty()); - return &*backend_.begin(); + Char* mutableData() { + return const_cast(backend_.data()); } void shrink(size_t delta) { - assert(delta <= size()); + FBSTRING_ASSERT(delta <= size()); backend_.resize(size() - delta); } - Char * expand_noinit(size_t delta) { + Char* expandNoinit(size_t delta) { auto const sz = size(); backend_.resize(size() + delta); return backend_.data() + sz; @@ -896,7 +1032,7 @@ public: backend_.reserve(minCapacity); } -private: + private: std::basic_string backend_; }; #endif // !_LIBSTDCXX_FBSTRING @@ -909,18 +1045,20 @@ private: #ifdef _LIBSTDCXX_FBSTRING template #else -template , - class A = std::allocator, - class Storage = fbstring_core > +template < + typename E, + class T = std::char_traits, + class A = std::allocator, + class Storage = fbstring_core> #endif class basic_fbstring { - static void enforce( bool condition, void (*throw_exc)(const char*), const char* msg) { - if (!condition) throw_exc(msg); + if (!condition) { + throw_exc(msg); + } } bool isSane() const { @@ -934,25 +1072,20 @@ class basic_fbstring { begin()[size()] == '\0'; } - struct Invariant; - friend struct Invariant; struct Invariant { -#ifndef NDEBUG - explicit Invariant(const basic_fbstring& s) : s_(s) { - assert(s_.isSane()); + Invariant& operator=(const Invariant&) = delete; + explicit Invariant(const basic_fbstring& s) noexcept : s_(s) { + FBSTRING_ASSERT(s_.isSane()); } - ~Invariant() { - assert(s_.isSane()); + ~Invariant() noexcept { + FBSTRING_ASSERT(s_.isSane()); } - private: + + private: const basic_fbstring& s_; -#else - explicit Invariant(const basic_fbstring&) {} -#endif - Invariant& operator=(const Invariant&); }; -public: + public: // types typedef T traits_type; typedef typename traits_type::char_type value_type; @@ -978,16 +1111,37 @@ public: #endif > const_reverse_iterator; - static const size_type npos; // = size_type(-1) + static constexpr size_type npos = size_type(-1); + typedef std::true_type IsRelocatable; -private: + private: static void procrustes(size_type& n, size_type nmax) { - if (n > nmax) n = nmax; + if (n > nmax) { + n = nmax; + } } -public: + static size_type traitsLength(const value_type* s); + + public: // C++11 21.4.2 construct/copy/destroy - explicit basic_fbstring(const A& a = A()) noexcept { + + // Note: while the following two constructors can be (and previously were) + // collapsed into one constructor written this way: + // + // explicit basic_fbstring(const A& a = A()) noexcept { } + // + // This can cause Clang (at least version 3.7) to fail with the error: + // "chosen constructor is explicit in copy-initialization ... + // in implicit initialization of field '(x)' with omitted initializer" + // + // if used in a struct which is default-initialized. Hence the split into + // these two separate constructors. + + basic_fbstring() noexcept : basic_fbstring(A()) { + } + + explicit basic_fbstring(const A&) noexcept { } basic_fbstring(const basic_fbstring& str) @@ -1001,47 +1155,47 @@ public: #ifndef _LIBSTDCXX_FBSTRING // This is defined for compatibility with std::string - /* implicit */ basic_fbstring(const std::string& str) - : store_(str.data(), str.size()) { - } + template + /* implicit */ basic_fbstring(const std::basic_string& str) + : store_(str.data(), str.size()) {} #endif - basic_fbstring(const basic_fbstring& str, size_type pos, - size_type n = npos, const A& a = A()) { + basic_fbstring(const basic_fbstring& str, + size_type pos, + size_type n = npos, + const A& /* a */ = A()) { assign(str, pos, n); } - /* implicit */ basic_fbstring(const value_type* s, const A& a = A()) - : store_(s, s - ? traits_type::length(s) - : [] { - std::__throw_logic_error( - "basic_fbstring: null pointer initializer not valid"); - return 0; - }()) { - } + FOLLY_MALLOC_NOINLINE + /* implicit */ basic_fbstring(const value_type* s, const A& /*a*/ = A()) + : store_(s, traitsLength(s)) {} - basic_fbstring(const value_type* s, size_type n, const A& a = A()) + FOLLY_MALLOC_NOINLINE + basic_fbstring(const value_type* s, size_type n, const A& /*a*/ = A()) : store_(s, n) { } - basic_fbstring(size_type n, value_type c, const A& a = A()) { - auto const data = store_.expand_noinit(n); - fbstring_detail::pod_fill(data, data + n, c); - store_.writeTerminator(); + FOLLY_MALLOC_NOINLINE + basic_fbstring(size_type n, value_type c, const A& /*a*/ = A()) { + auto const pData = store_.expandNoinit(n); + fbstring_detail::podFill(pData, pData + n, c); } template - basic_fbstring(InIt begin, InIt end, - typename std::enable_if< - !std::is_same::type, - value_type*>::value, const A>::type & a = A()) { + FOLLY_MALLOC_NOINLINE basic_fbstring( + InIt begin, + InIt end, + typename std::enable_if< + !std::is_same::value, + const A>::type& /*a*/ = A()) { assign(begin, end); } // Specialization for const char*, const char* - basic_fbstring(const value_type* b, const value_type* e) - : store_(b, e - b) { + FOLLY_MALLOC_NOINLINE + basic_fbstring(const value_type* b, const value_type* e, const A& /*a*/ = A()) + : store_(b, size_type(e - b)) { } // Nonstandard constructor @@ -1051,58 +1205,28 @@ public: } // Construction from initialization list + FOLLY_MALLOC_NOINLINE basic_fbstring(std::initializer_list il) { assign(il.begin(), il.end()); } - ~basic_fbstring() noexcept { - } + ~basic_fbstring() noexcept {} - basic_fbstring& operator=(const basic_fbstring& lhs) { - if (FBSTRING_UNLIKELY(&lhs == this)) { - return *this; - } - auto const oldSize = size(); - auto const srcSize = lhs.size(); - if (capacity() >= srcSize && !store_.isShared()) { - // great, just copy the contents - if (oldSize < srcSize) - store_.expand_noinit(srcSize - oldSize); - else - store_.shrink(oldSize - srcSize); - assert(size() == srcSize); - fbstring_detail::pod_copy(lhs.begin(), lhs.end(), begin()); - store_.writeTerminator(); - } else { - // need to reallocate, so we may as well create a brand new string - basic_fbstring(lhs).swap(*this); - } - return *this; - } + basic_fbstring& operator=(const basic_fbstring& lhs); // Move assignment - basic_fbstring& operator=(basic_fbstring&& goner) noexcept { - if (FBSTRING_UNLIKELY(&goner == this)) { - // Compatibility with std::basic_string<>, - // C++11 21.4.2 [string.cons] / 23 requires self-move-assignment support. - return *this; - } - // No need of this anymore - this->~basic_fbstring(); - // Move the goner into this - new(&store_) fbstring_core(std::move(goner.store_)); - return *this; - } + basic_fbstring& operator=(basic_fbstring&& goner) noexcept; #ifndef _LIBSTDCXX_FBSTRING // Compatibility with std::string - basic_fbstring & operator=(const std::string & rhs) { + template + basic_fbstring& operator=(const std::basic_string& rhs) { return assign(rhs.data(), rhs.size()); } // Compatibility with std::string - std::string toStdString() const { - return std::string(data(), size()); + std::basic_string toStdString() const { + return std::basic_string(data(), size()); } #else // A lot of code in fbcode still uses this method, so keep it here for now. @@ -1115,33 +1239,44 @@ public: return assign(s); } - basic_fbstring& operator=(value_type c) { - if (empty()) { - store_.expand_noinit(1); - } else if (store_.isShared()) { - basic_fbstring(1, c).swap(*this); - return *this; - } else { - store_.shrink(size() - 1); - } - *store_.mutable_data() = c; - store_.writeTerminator(); - return *this; - } + // This actually goes directly against the C++ spec, but the + // value_type overload is dangerous, so we're explicitly deleting + // any overloads of operator= that could implicitly convert to + // value_type. + // Note that we do need to explicitly specify the template types because + // otherwise MSVC 2017 will aggressively pre-resolve value_type to + // traits_type::char_type, which won't compare as equal when determining + // which overload the implementation is referring to. + // Also note that MSVC 2015 Update 3 requires us to explicitly specify the + // namespace in-which to search for basic_fbstring, otherwise it tries to + // look for basic_fbstring::basic_fbstring, which is just plain wrong. + template + typename std::enable_if< + std::is_same< + typename std::decay::type, + typename folly::basic_fbstring::value_type>::value, + basic_fbstring&>::type + operator=(TP c); basic_fbstring& operator=(std::initializer_list il) { return assign(il.begin(), il.end()); } // C++11 21.4.3 iterators: - iterator begin() { return store_.mutable_data(); } + iterator begin() { + return store_.mutableData(); + } - const_iterator begin() const { return store_.data(); } + const_iterator begin() const { + return store_.data(); + } - const_iterator cbegin() const { return begin(); } + const_iterator cbegin() const { + return begin(); + } iterator end() { - return store_.mutable_data() + store_.size(); + return store_.mutableData() + store_.size(); } const_iterator end() const { @@ -1174,18 +1309,18 @@ public: // C++11 21.4.5, element access: const value_type& front() const { return *begin(); } const value_type& back() const { - assert(!empty()); + FBSTRING_ASSERT(!empty()); // Should be begin()[size() - 1], but that branches twice return *(end() - 1); } value_type& front() { return *begin(); } value_type& back() { - assert(!empty()); + FBSTRING_ASSERT(!empty()); // Should be begin()[size() - 1], but that branches twice return *(end() - 1); } void pop_back() { - assert(!empty()); + FBSTRING_ASSERT(!empty()); store_.shrink(1); } @@ -1198,33 +1333,7 @@ public: return std::numeric_limits::max(); } - void resize(const size_type n, const value_type c = value_type()) { - auto size = this->size(); - if (n <= size) { - store_.shrink(size - n); - } else { - // Do this in two steps to minimize slack memory copied (see - // smartRealloc). - auto const capacity = this->capacity(); - assert(capacity >= size); - if (size < capacity) { - auto delta = std::min(n, capacity) - size; - store_.expand_noinit(delta); - fbstring_detail::pod_fill(begin() + size, end(), c); - size += delta; - if (size == n) { - store_.writeTerminator(); - return; - } - assert(size < n); - } - auto const delta = n - size; - store_.expand_noinit(delta); - fbstring_detail::pod_fill(end() - delta, end(), c); - store_.writeTerminator(); - } - assert(this->size() == n); - } + void resize(size_type n, value_type c = value_type()); size_type capacity() const { return store_.capacity(); } @@ -1247,14 +1356,10 @@ public: // C++11 21.4.5 element access: const_reference operator[](size_type pos) const { - return *(c_str() + pos); + return *(begin() + pos); } reference operator[](size_type pos) { - if (pos == size()) { - // Just call c_str() to make sure '\0' is present - c_str(); - } return *(begin() + pos); } @@ -1287,73 +1392,23 @@ public: return *this; } - basic_fbstring& append(const basic_fbstring& str) { -#ifndef NDEBUG - auto desiredSize = size() + str.size(); -#endif - append(str.data(), str.size()); - assert(size() == desiredSize); - return *this; + basic_fbstring& append(const basic_fbstring& str); + + basic_fbstring& + append(const basic_fbstring& str, const size_type pos, size_type n); + + basic_fbstring& append(const value_type* s, size_type n); + + basic_fbstring& append(const value_type* s) { + return append(s, traitsLength(s)); } - basic_fbstring& append(const basic_fbstring& str, const size_type pos, - size_type n) { - const size_type sz = str.size(); - enforce(pos <= sz, std::__throw_out_of_range, ""); - procrustes(n, sz - pos); - return append(str.data() + pos, n); - } - - basic_fbstring& append(const value_type* s, size_type n) { -#ifndef NDEBUG - Invariant checker(*this); - (void) checker; -#endif - if (FBSTRING_UNLIKELY(!n)) { - // Unlikely but must be done - return *this; - } - auto const oldSize = size(); - auto const oldData = data(); - // Check for aliasing (rare). We could use "<=" here but in theory - // those do not work for pointers unless the pointers point to - // elements in the same array. For that reason we use - // std::less_equal, which is guaranteed to offer a total order - // over pointers. See discussion at http://goo.gl/Cy2ya for more - // info. - std::less_equal le; - if (FBSTRING_UNLIKELY(le(oldData, s) && !le(oldData + oldSize, s))) { - assert(le(s + n, oldData + oldSize)); - const size_type offset = s - oldData; - store_.reserve(oldSize + n); - // Restore the source - s = data() + offset; - } - // Warning! Repeated appends with short strings may actually incur - // practically quadratic performance. Avoid that by pushing back - // the first character (which ensures exponential growth) and then - // appending the rest normally. Worst case the append may incur a - // second allocation but that will be rare. - push_back(*s++); - --n; - memcpy(store_.expand_noinit(n), s, n * sizeof(value_type)); - assert(size() == oldSize + n + 1); - return *this; - } - - basic_fbstring& append(const value_type* s) { - return append(s, traits_type::length(s)); - } - - basic_fbstring& append(size_type n, value_type c) { - resize(size() + n, c); - return *this; - } - - template - basic_fbstring& append(InputIterator first, InputIterator last) { - insert(end(), first, last); - return *this; + basic_fbstring& append(size_type n, value_type c); + + template + basic_fbstring& append(InputIterator first, InputIterator last) { + insert(end(), first, last); + return *this; } basic_fbstring& append(std::initializer_list il) { @@ -1365,7 +1420,9 @@ public: } basic_fbstring& assign(const basic_fbstring& str) { - if (&str == this) return *this; + if (&str == this) { + return *this; + } return assign(str.data(), str.size()); } @@ -1373,34 +1430,13 @@ public: return *this = std::move(str); } - basic_fbstring& assign(const basic_fbstring& str, const size_type pos, - size_type n) { - const size_type sz = str.size(); - enforce(pos <= sz, std::__throw_out_of_range, ""); - procrustes(n, sz - pos); - return assign(str.data() + pos, n); - } + basic_fbstring& + assign(const basic_fbstring& str, const size_type pos, size_type n); - basic_fbstring& assign(const value_type* s, const size_type n) { - Invariant checker(*this); - (void) checker; - if (size() >= n) { - std::copy(s, s + n, begin()); - resize(n); - assert(size() == n); - } else { - const value_type *const s2 = s + size(); - std::copy(s, s2, begin()); - append(s2, n - size()); - assert(size() == n); - } - store_.writeTerminator(); - assert(size() == n); - return *this; - } + basic_fbstring& assign(const value_type* s, const size_type n); basic_fbstring& assign(const value_type* s) { - return assign(s, traits_type::length(s)); + return assign(s, traitsLength(s)); } basic_fbstring& assign(std::initializer_list il) { @@ -1430,7 +1466,7 @@ public: } basic_fbstring& insert(size_type pos, const value_type* s) { - return insert(pos, s, traits_type::length(s)); + return insert(pos, s, traitsLength(s)); } basic_fbstring& insert(size_type pos, size_type n, value_type c) { @@ -1440,111 +1476,57 @@ public: } iterator insert(const_iterator p, const value_type c) { - const size_type pos = p - begin(); + const size_type pos = p - cbegin(); insert(p, 1, c); return begin() + pos; } -private: - template class Selector {}; +#ifndef _LIBSTDCXX_FBSTRING + private: + typedef std::basic_istream istream_type; + istream_type& getlineImpl(istream_type& is, value_type delim); - iterator insertImplDiscr(const_iterator p, - size_type n, value_type c, Selector<1>) { - Invariant checker(*this); - (void) checker; - auto const pos = p - begin(); - assert(p >= begin() && p <= end()); - if (capacity() - size() < n) { - const size_type sz = p - begin(); - reserve(size() + n); - p = begin() + sz; - } - const iterator oldEnd = end(); - if (n < size_type(oldEnd - p)) { - append(oldEnd - n, oldEnd); - //std::copy( - // reverse_iterator(oldEnd - n), - // reverse_iterator(p), - // reverse_iterator(oldEnd)); - fbstring_detail::pod_move(&*p, &*oldEnd - n, - begin() + pos + n); - std::fill(begin() + pos, begin() + pos + n, c); - } else { - append(n - (end() - p), c); - append(iterator(p), oldEnd); - std::fill(iterator(p), oldEnd, c); - } - store_.writeTerminator(); - return begin() + pos; + public: + friend inline istream_type& getline(istream_type& is, + basic_fbstring& str, + value_type delim) { + return str.getlineImpl(is, delim); } - template - iterator insertImplDiscr(const_iterator i, - InputIter b, InputIter e, Selector<0>) { - return insertImpl(i, b, e, - typename std::iterator_traits::iterator_category()); + friend inline istream_type& getline(istream_type& is, basic_fbstring& str) { + return getline(is, str, '\n'); } +#endif + + private: + iterator + insertImplDiscr(const_iterator i, size_type n, value_type c, std::true_type); + + template + iterator + insertImplDiscr(const_iterator i, InputIter b, InputIter e, std::false_type); template - iterator insertImpl(const_iterator i, - FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) { - Invariant checker(*this); - (void) checker; - const size_type pos = i - begin(); - const typename std::iterator_traits::difference_type n2 = - std::distance(s1, s2); - assert(n2 >= 0); - using namespace fbstring_detail; - assert(pos <= size()); - - const typename std::iterator_traits::difference_type maxn2 = - capacity() - size(); - if (maxn2 < n2) { - // realloc the string - reserve(size() + n2); - i = begin() + pos; - } - if (pos + n2 <= size()) { - const iterator tailBegin = end() - n2; - store_.expand_noinit(n2); - fbstring_detail::pod_copy(tailBegin, tailBegin + n2, end() - n2); - std::copy(const_reverse_iterator(tailBegin), const_reverse_iterator(i), - reverse_iterator(tailBegin + n2)); - std::copy(s1, s2, begin() + pos); - } else { - FwdIterator t = s1; - const size_type old_size = size(); - std::advance(t, old_size - pos); - const size_t newElems = std::distance(t, s2); - store_.expand_noinit(n2); - std::copy(t, s2, begin() + old_size); - fbstring_detail::pod_copy(data() + pos, data() + old_size, - begin() + old_size + newElems); - std::copy(s1, t, begin() + pos); - } - store_.writeTerminator(); - return begin() + pos; - } + iterator insertImpl( + const_iterator i, + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag); template - iterator insertImpl(const_iterator i, - InputIterator b, InputIterator e, - std::input_iterator_tag) { - const auto pos = i - begin(); - basic_fbstring temp(begin(), i); - for (; b != e; ++b) { - temp.push_back(*b); - } - temp.append(i, cend()); - swap(temp); - return begin() + pos; - } + iterator insertImpl( + const_iterator i, + InputIterator b, + InputIterator e, + std::input_iterator_tag); -public: + public: template iterator insert(const_iterator p, ItOrLength first_or_n, ItOrChar last_or_c) { - Selector::is_specialized> sel; - return insertImplDiscr(p, first_or_n, last_or_c, sel); + using Sel = std::integral_constant< + bool, + std::numeric_limits::is_specialized>; + return insertImplDiscr(p, first_or_n, last_or_c, Sel()); } iterator insert(const_iterator p, std::initializer_list il) { @@ -1553,7 +1535,7 @@ public: basic_fbstring& erase(size_type pos = 0, size_type n = npos) { Invariant checker(*this); - (void) checker; + enforce(pos <= length(), std::__throw_out_of_range, ""); procrustes(n, length() - pos); std::copy(begin() + pos + n, end(), begin() + pos); @@ -1593,7 +1575,7 @@ public: // Replaces at most n1 chars of *this, starting with pos, with chars from s basic_fbstring& replace(size_type pos, size_type n1, const value_type* s) { - return replace(pos, n1, s, traits_type::length(s)); + return replace(pos, n1, s, traitsLength(s)); } // Replaces at most n1 chars of *this, starting with pos, with n2 @@ -1607,7 +1589,7 @@ public: basic_fbstring& replace(size_type pos, size_type n1, StrOrLength s_or_n2, NumOrChar n_or_c) { Invariant checker(*this); - (void) checker; + enforce(pos <= size(), std::__throw_out_of_range, ""); procrustes(n1, length() - pos); const iterator b = begin() + pos; @@ -1619,126 +1601,85 @@ public: } basic_fbstring& replace(iterator i1, iterator i2, const value_type* s) { - return replace(i1, i2, s, traits_type::length(s)); + return replace(i1, i2, s, traitsLength(s)); } -private: - basic_fbstring& replaceImplDiscr(iterator i1, iterator i2, - const value_type* s, size_type n, - Selector<2>) { - assert(i1 <= i2); - assert(begin() <= i1 && i1 <= end()); - assert(begin() <= i2 && i2 <= end()); - return replace(i1, i2, s, s + n); - } + private: + basic_fbstring& replaceImplDiscr( + iterator i1, + iterator i2, + const value_type* s, + size_type n, + std::integral_constant); - basic_fbstring& replaceImplDiscr(iterator i1, iterator i2, - size_type n2, value_type c, Selector<1>) { - const size_type n1 = i2 - i1; - if (n1 > n2) { - std::fill(i1, i1 + n2, c); - erase(i1 + n2, i2); - } else { - std::fill(i1, i2, c); - insert(i2, n2 - n1, c); - } - assert(isSane()); - return *this; - } + basic_fbstring& replaceImplDiscr( + iterator i1, + iterator i2, + size_type n2, + value_type c, + std::integral_constant); template - basic_fbstring& replaceImplDiscr(iterator i1, iterator i2, - InputIter b, InputIter e, - Selector<0>) { - replaceImpl(i1, i2, b, e, - typename std::iterator_traits::iterator_category()); - return *this; - } - -private: + basic_fbstring& replaceImplDiscr( + iterator i1, + iterator i2, + InputIter b, + InputIter e, + std::integral_constant); + + private: template - bool replaceAliased(iterator i1, iterator i2, - FwdIterator s1, FwdIterator s2, std::false_type) { + bool replaceAliased( + iterator /* i1 */, + iterator /* i2 */, + FwdIterator /* s1 */, + FwdIterator /* s2 */, + std::false_type) { return false; } template - bool replaceAliased(iterator i1, iterator i2, - FwdIterator s1, FwdIterator s2, std::true_type) { - static const std::less_equal le = - std::less_equal(); - const bool aliased = le(&*begin(), &*s1) && le(&*s1, &*end()); - if (!aliased) { - return false; - } - // Aliased replace, copy to new string - basic_fbstring temp; - temp.reserve(size() - (i2 - i1) + std::distance(s1, s2)); - temp.append(begin(), i1).append(s1, s2).append(i2, end()); - swap(temp); - return true; - } + bool replaceAliased( + iterator i1, + iterator i2, + FwdIterator s1, + FwdIterator s2, + std::true_type); template - void replaceImpl(iterator i1, iterator i2, - FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) { - Invariant checker(*this); - (void) checker; - - // Handle aliased replace - if (replaceAliased(i1, i2, s1, s2, - std::integral_constant::value || - std::is_same::value>())) { - return; - } - - auto const n1 = i2 - i1; - assert(n1 >= 0); - auto const n2 = std::distance(s1, s2); - assert(n2 >= 0); - - if (n1 > n2) { - // shrinks - std::copy(s1, s2, i1); - erase(i1 + n2, i2); - } else { - // grows - fbstring_detail::copy_n(s1, n1, i1); - std::advance(s1, n1); - insert(i2, s1, s2); - } - assert(isSane()); - } + void replaceImpl( + iterator i1, + iterator i2, + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag); template - void replaceImpl(iterator i1, iterator i2, - InputIterator b, InputIterator e, std::input_iterator_tag) { - basic_fbstring temp(begin(), i1); - temp.append(b, e).append(i2, end()); - swap(temp); - } - -public: + void replaceImpl( + iterator i1, + iterator i2, + InputIterator b, + InputIterator e, + std::input_iterator_tag); + + public: template basic_fbstring& replace(iterator i1, iterator i2, T1 first_or_n_or_s, T2 last_or_c_or_n) { - const bool - num1 = std::numeric_limits::is_specialized, - num2 = std::numeric_limits::is_specialized; - return replaceImplDiscr( - i1, i2, first_or_n_or_s, last_or_c_or_n, - Selector()); + constexpr bool num1 = std::numeric_limits::is_specialized, + num2 = std::numeric_limits::is_specialized; + using Sel = + std::integral_constant; + return replaceImplDiscr(i1, i2, first_or_n_or_s, last_or_c_or_n, Sel()); } size_type copy(value_type* s, size_type n, size_type pos = 0) const { enforce(pos <= size(), std::__throw_out_of_range, ""); procrustes(n, size() - pos); - fbstring_detail::pod_copy( - data() + pos, - data() + pos + n, - s); + if (n != 0) { + fbstring_detail::podCopy(data() + pos, data() + pos + n, s); + } return n; } @@ -1760,63 +1701,11 @@ public: return find(str.data(), pos, str.length()); } - size_type find(const value_type* needle, const size_type pos, - const size_type nsize) const { - if (!nsize) return pos; - auto const size = this->size(); - // nsize + pos can overflow (eg pos == npos), guard against that by checking - // that nsize + pos does not wrap around. - if (nsize + pos > size || nsize + pos < pos) return npos; - // Don't use std::search, use a Boyer-Moore-like trick by comparing - // the last characters first - auto const haystack = data(); - auto const nsize_1 = nsize - 1; - auto const lastNeedle = needle[nsize_1]; - - // Boyer-Moore skip value for the last char in the needle. Zero is - // not a valid value; skip will be computed the first time it's - // needed. - size_type skip = 0; - - const E * i = haystack + pos; - auto iEnd = haystack + size - nsize_1; - - while (i < iEnd) { - // Boyer-Moore: match the last element in the needle - while (i[nsize_1] != lastNeedle) { - if (++i == iEnd) { - // not found - return npos; - } - } - // Here we know that the last char matches - // Continue in pedestrian mode - for (size_t j = 0; ; ) { - assert(j < nsize); - if (i[j] != needle[j]) { - // Not found, we can skip - // Compute the skip value lazily - if (skip == 0) { - skip = 1; - while (skip <= nsize_1 && needle[nsize_1 - skip] != lastNeedle) { - ++skip; - } - } - i += skip; - break; - } - // Check if done searching - if (++j == nsize) { - // Yay - return i - haystack; - } - } - } - return npos; - } + size_type find(const value_type* needle, size_type pos, size_type nsize) + const; size_type find(const value_type* s, size_type pos = 0) const { - return find(s, pos, traits_type::length(s)); + return find(s, pos, traitsLength(s)); } size_type find (value_type c, size_type pos = 0) const { @@ -1827,24 +1716,10 @@ public: return rfind(str.data(), pos, str.length()); } - size_type rfind(const value_type* s, size_type pos, size_type n) const { - if (n > length()) return npos; - pos = std::min(pos, length() - n); - if (n == 0) return pos; - - const_iterator i(begin() + pos); - for (; ; --i) { - if (traits_type::eq(*i, *s) - && traits_type::compare(&*i, s, n) == 0) { - return i - begin(); - } - if (i == begin()) break; - } - return npos; - } + size_type rfind(const value_type* s, size_type pos, size_type n) const; size_type rfind(const value_type* s, size_type pos = npos) const { - return rfind(s, pos, traits_type::length(s)); + return rfind(s, pos, traitsLength(s)); } size_type rfind(value_type c, size_type pos = npos) const { @@ -1855,50 +1730,27 @@ public: return find_first_of(str.data(), pos, str.length()); } - size_type find_first_of(const value_type* s, - size_type pos, size_type n) const { - if (pos > length() || n == 0) return npos; - const_iterator i(begin() + pos), - finish(end()); - for (; i != finish; ++i) { - if (traits_type::find(s, n, *i) != 0) { - return i - begin(); - } - } - return npos; - } + size_type find_first_of(const value_type* s, size_type pos, size_type n) + const; size_type find_first_of(const value_type* s, size_type pos = 0) const { - return find_first_of(s, pos, traits_type::length(s)); + return find_first_of(s, pos, traitsLength(s)); } size_type find_first_of(value_type c, size_type pos = 0) const { return find_first_of(&c, pos, 1); } - size_type find_last_of (const basic_fbstring& str, - size_type pos = npos) const { + size_type find_last_of(const basic_fbstring& str, size_type pos = npos) + const { return find_last_of(str.data(), pos, str.length()); } - size_type find_last_of (const value_type* s, size_type pos, - size_type n) const { - if (!empty() && n > 0) { - pos = std::min(pos, length() - 1); - const_iterator i(begin() + pos); - for (;; --i) { - if (traits_type::find(s, n, *i) != 0) { - return i - begin(); - } - if (i == begin()) break; - } - } - return npos; - } + size_type find_last_of(const value_type* s, size_type pos, size_type n) const; size_type find_last_of (const value_type* s, size_type pos = npos) const { - return find_last_of(s, pos, traits_type::length(s)); + return find_last_of(s, pos, traitsLength(s)); } size_type find_last_of (value_type c, size_type pos = npos) const { @@ -1910,24 +1762,12 @@ public: return find_first_not_of(str.data(), pos, str.size()); } - size_type find_first_not_of(const value_type* s, size_type pos, - size_type n) const { - if (pos < length()) { - const_iterator - i(begin() + pos), - finish(end()); - for (; i != finish; ++i) { - if (traits_type::find(s, n, *i) == 0) { - return i - begin(); - } - } - } - return npos; - } + size_type find_first_not_of(const value_type* s, size_type pos, size_type n) + const; size_type find_first_not_of(const value_type* s, size_type pos = 0) const { - return find_first_not_of(s, pos, traits_type::length(s)); + return find_first_not_of(s, pos, traitsLength(s)); } size_type find_first_not_of(value_type c, size_type pos = 0) const { @@ -1939,35 +1779,32 @@ public: return find_last_not_of(str.data(), pos, str.length()); } - size_type find_last_not_of(const value_type* s, size_type pos, - size_type n) const { - if (!this->empty()) { - pos = std::min(pos, size() - 1); - const_iterator i(begin() + pos); - for (;; --i) { - if (traits_type::find(s, n, *i) == 0) { - return i - begin(); - } - if (i == begin()) break; - } - } - return npos; - } + size_type find_last_not_of(const value_type* s, size_type pos, size_type n) + const; size_type find_last_not_of(const value_type* s, size_type pos = npos) const { - return find_last_not_of(s, pos, traits_type::length(s)); + return find_last_not_of(s, pos, traitsLength(s)); } size_type find_last_not_of (value_type c, size_type pos = npos) const { return find_last_not_of(&c, pos, 1); } - basic_fbstring substr(size_type pos = 0, size_type n = npos) const { + basic_fbstring substr(size_type pos = 0, size_type n = npos) const& { enforce(pos <= size(), std::__throw_out_of_range, ""); return basic_fbstring(data() + pos, std::min(n, size() - pos)); } + basic_fbstring substr(size_type pos = 0, size_type n = npos) && { + enforce(pos <= size(), std::__throw_out_of_range, ""); + erase(0, pos); + if (n < size()) { + resize(n); + } + return std::move(*this); + } + int compare(const basic_fbstring& str) const { // FIX due to Goncalo N M de Carvalho July 18, 2005 return compare(0, size(), str); @@ -1980,7 +1817,7 @@ public: int compare(size_type pos1, size_type n1, const value_type* s) const { - return compare(pos1, n1, s, traits_type::length(s)); + return compare(pos1, n1, s, traitsLength(s)); } int compare(size_type pos1, size_type n1, @@ -2002,20 +1839,562 @@ public: // Code from Jean-Francois Bastien (03/26/2007) int compare(const value_type* s) const { - // Could forward to compare(0, size(), s, traits_type::length(s)) + // Could forward to compare(0, size(), s, traitsLength(s)) // but that does two extra checks - const size_type n1(size()), n2(traits_type::length(s)); + const size_type n1(size()), n2(traitsLength(s)); const int r = traits_type::compare(data(), s, std::min(n1, n2)); return r != 0 ? r : n1 > n2 ? 1 : n1 < n2 ? -1 : 0; } -private: + private: // Data Storage store_; }; +template +FOLLY_MALLOC_NOINLINE inline typename basic_fbstring::size_type +basic_fbstring::traitsLength(const value_type* s) { + return s ? traits_type::length(s) + : (std::__throw_logic_error( + "basic_fbstring: null pointer initializer not valid"), + 0); +} + +template +inline basic_fbstring& basic_fbstring::operator=( + const basic_fbstring& lhs) { + Invariant checker(*this); + + if (FBSTRING_UNLIKELY(&lhs == this)) { + return *this; + } + + return assign(lhs.data(), lhs.size()); +} + +// Move assignment +template +inline basic_fbstring& basic_fbstring::operator=( + basic_fbstring&& goner) noexcept { + if (FBSTRING_UNLIKELY(&goner == this)) { + // Compatibility with std::basic_string<>, + // C++11 21.4.2 [string.cons] / 23 requires self-move-assignment support. + return *this; + } + // No need of this anymore + this->~basic_fbstring(); + // Move the goner into this + new (&store_) S(std::move(goner.store_)); + return *this; +} + +template +template +inline typename std::enable_if< + std::is_same< + typename std::decay::type, + typename basic_fbstring::value_type>::value, + basic_fbstring&>::type +basic_fbstring::operator=(TP c) { + Invariant checker(*this); + + if (empty()) { + store_.expandNoinit(1); + } else if (store_.isShared()) { + basic_fbstring(1, c).swap(*this); + return *this; + } else { + store_.shrink(size() - 1); + } + front() = c; + return *this; +} + +template +inline void basic_fbstring::resize( + const size_type n, const value_type c /*= value_type()*/) { + Invariant checker(*this); + + auto size = this->size(); + if (n <= size) { + store_.shrink(size - n); + } else { + auto const delta = n - size; + auto pData = store_.expandNoinit(delta); + fbstring_detail::podFill(pData, pData + delta, c); + } + FBSTRING_ASSERT(this->size() == n); +} + +template +inline basic_fbstring& basic_fbstring::append( + const basic_fbstring& str) { +#ifndef NDEBUG + auto desiredSize = size() + str.size(); +#endif + append(str.data(), str.size()); + FBSTRING_ASSERT(size() == desiredSize); + return *this; +} + +template +inline basic_fbstring& basic_fbstring::append( + const basic_fbstring& str, const size_type pos, size_type n) { + const size_type sz = str.size(); + enforce(pos <= sz, std::__throw_out_of_range, ""); + procrustes(n, sz - pos); + return append(str.data() + pos, n); +} + +template +FOLLY_MALLOC_NOINLINE inline basic_fbstring& +basic_fbstring::append(const value_type* s, size_type n) { + Invariant checker(*this); + + if (FBSTRING_UNLIKELY(!n)) { + // Unlikely but must be done + return *this; + } + auto const oldSize = size(); + auto const oldData = data(); + auto pData = store_.expandNoinit(n, /* expGrowth = */ true); + + // Check for aliasing (rare). We could use "<=" here but in theory + // those do not work for pointers unless the pointers point to + // elements in the same array. For that reason we use + // std::less_equal, which is guaranteed to offer a total order + // over pointers. See discussion at http://goo.gl/Cy2ya for more + // info. + std::less_equal le; + if (FBSTRING_UNLIKELY(le(oldData, s) && !le(oldData + oldSize, s))) { + FBSTRING_ASSERT(le(s + n, oldData + oldSize)); + // expandNoinit() could have moved the storage, restore the source. + s = data() + (s - oldData); + fbstring_detail::podMove(s, s + n, pData); + } else { + fbstring_detail::podCopy(s, s + n, pData); + } + + FBSTRING_ASSERT(size() == oldSize + n); + return *this; +} + +template +inline basic_fbstring& basic_fbstring::append( + size_type n, value_type c) { + Invariant checker(*this); + auto pData = store_.expandNoinit(n, /* expGrowth = */ true); + fbstring_detail::podFill(pData, pData + n, c); + return *this; +} + +template +inline basic_fbstring& basic_fbstring::assign( + const basic_fbstring& str, const size_type pos, size_type n) { + const size_type sz = str.size(); + enforce(pos <= sz, std::__throw_out_of_range, ""); + procrustes(n, sz - pos); + return assign(str.data() + pos, n); +} + +template +FOLLY_MALLOC_NOINLINE inline basic_fbstring& +basic_fbstring::assign(const value_type* s, const size_type n) { + Invariant checker(*this); + + if (n == 0) { + resize(0); + } else if (size() >= n) { + // s can alias this, we need to use podMove. + fbstring_detail::podMove(s, s + n, store_.mutableData()); + store_.shrink(size() - n); + FBSTRING_ASSERT(size() == n); + } else { + // If n is larger than size(), s cannot alias this string's + // storage. + resize(0); + // Do not use exponential growth here: assign() should be tight, + // to mirror the behavior of the equivalent constructor. + fbstring_detail::podCopy(s, s + n, store_.expandNoinit(n)); + } + + FBSTRING_ASSERT(size() == n); + return *this; +} + +#ifndef _LIBSTDCXX_FBSTRING +template +inline typename basic_fbstring::istream_type& +basic_fbstring::getlineImpl(istream_type & is, value_type delim) { + Invariant checker(*this); + + clear(); + size_t size = 0; + while (true) { + size_t avail = capacity() - size; + // fbstring has 1 byte extra capacity for the null terminator, + // and getline null-terminates the read string. + is.getline(store_.expandNoinit(avail), avail + 1, delim); + size += is.gcount(); + + if (is.bad() || is.eof() || !is.fail()) { + // Done by either failure, end of file, or normal read. + if (!is.bad() && !is.eof()) { + --size; // gcount() also accounts for the delimiter. + } + resize(size); + break; + } + + FBSTRING_ASSERT(size == this->size()); + FBSTRING_ASSERT(size == capacity()); + // Start at minimum allocation 63 + terminator = 64. + reserve(std::max(63, 3 * size / 2)); + // Clear the error so we can continue reading. + is.clear(); + } + return is; +} +#endif + +template +inline typename basic_fbstring::size_type +basic_fbstring::find( + const value_type* needle, const size_type pos, const size_type nsize) + const { + auto const size = this->size(); + // nsize + pos can overflow (eg pos == npos), guard against that by checking + // that nsize + pos does not wrap around. + if (nsize + pos > size || nsize + pos < pos) { + return npos; + } + + if (nsize == 0) { + return pos; + } + // Don't use std::search, use a Boyer-Moore-like trick by comparing + // the last characters first + auto const haystack = data(); + auto const nsize_1 = nsize - 1; + auto const lastNeedle = needle[nsize_1]; + + // Boyer-Moore skip value for the last char in the needle. Zero is + // not a valid value; skip will be computed the first time it's + // needed. + size_type skip = 0; + + const E* i = haystack + pos; + auto iEnd = haystack + size - nsize_1; + + while (i < iEnd) { + // Boyer-Moore: match the last element in the needle + while (i[nsize_1] != lastNeedle) { + if (++i == iEnd) { + // not found + return npos; + } + } + // Here we know that the last char matches + // Continue in pedestrian mode + for (size_t j = 0;;) { + FBSTRING_ASSERT(j < nsize); + if (i[j] != needle[j]) { + // Not found, we can skip + // Compute the skip value lazily + if (skip == 0) { + skip = 1; + while (skip <= nsize_1 && needle[nsize_1 - skip] != lastNeedle) { + ++skip; + } + } + i += skip; + break; + } + // Check if done searching + if (++j == nsize) { + // Yay + return i - haystack; + } + } + } + return npos; +} + +template +inline typename basic_fbstring::iterator +basic_fbstring::insertImplDiscr( + const_iterator i, size_type n, value_type c, std::true_type) { + Invariant checker(*this); + + FBSTRING_ASSERT(i >= cbegin() && i <= cend()); + const size_type pos = i - cbegin(); + + auto oldSize = size(); + store_.expandNoinit(n, /* expGrowth = */ true); + auto b = begin(); + fbstring_detail::podMove(b + pos, b + oldSize, b + pos + n); + fbstring_detail::podFill(b + pos, b + pos + n, c); + + return b + pos; +} + +template +template +inline typename basic_fbstring::iterator +basic_fbstring::insertImplDiscr( + const_iterator i, InputIter b, InputIter e, std::false_type) { + return insertImpl( + i, b, e, typename std::iterator_traits::iterator_category()); +} + +template +template +inline typename basic_fbstring::iterator +basic_fbstring::insertImpl( + const_iterator i, + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag) { + Invariant checker(*this); + + FBSTRING_ASSERT(i >= cbegin() && i <= cend()); + const size_type pos = i - cbegin(); + auto n = std::distance(s1, s2); + FBSTRING_ASSERT(n >= 0); + + auto oldSize = size(); + store_.expandNoinit(n, /* expGrowth = */ true); + auto b = begin(); + fbstring_detail::podMove(b + pos, b + oldSize, b + pos + n); + std::copy(s1, s2, b + pos); + + return b + pos; +} + +template +template +inline typename basic_fbstring::iterator +basic_fbstring::insertImpl( + const_iterator i, + InputIterator b, + InputIterator e, + std::input_iterator_tag) { + const auto pos = i - cbegin(); + basic_fbstring temp(cbegin(), i); + for (; b != e; ++b) { + temp.push_back(*b); + } + temp.append(i, cend()); + swap(temp); + return begin() + pos; +} + +template +inline basic_fbstring& basic_fbstring::replaceImplDiscr( + iterator i1, + iterator i2, + const value_type* s, + size_type n, + std::integral_constant) { + FBSTRING_ASSERT(i1 <= i2); + FBSTRING_ASSERT(begin() <= i1 && i1 <= end()); + FBSTRING_ASSERT(begin() <= i2 && i2 <= end()); + return replace(i1, i2, s, s + n); +} + +template +inline basic_fbstring& basic_fbstring::replaceImplDiscr( + iterator i1, + iterator i2, + size_type n2, + value_type c, + std::integral_constant) { + const size_type n1 = i2 - i1; + if (n1 > n2) { + std::fill(i1, i1 + n2, c); + erase(i1 + n2, i2); + } else { + std::fill(i1, i2, c); + insert(i2, n2 - n1, c); + } + FBSTRING_ASSERT(isSane()); + return *this; +} + +template +template +inline basic_fbstring& basic_fbstring::replaceImplDiscr( + iterator i1, + iterator i2, + InputIter b, + InputIter e, + std::integral_constant) { + using Cat = typename std::iterator_traits::iterator_category; + replaceImpl(i1, i2, b, e, Cat()); + return *this; +} + +template +template +inline bool basic_fbstring::replaceAliased( + iterator i1, iterator i2, FwdIterator s1, FwdIterator s2, std::true_type) { + std::less_equal le{}; + const bool aliased = le(&*begin(), &*s1) && le(&*s1, &*end()); + if (!aliased) { + return false; + } + // Aliased replace, copy to new string + basic_fbstring temp; + temp.reserve(size() - (i2 - i1) + std::distance(s1, s2)); + temp.append(begin(), i1).append(s1, s2).append(i2, end()); + swap(temp); + return true; +} + +template +template +inline void basic_fbstring::replaceImpl( + iterator i1, + iterator i2, + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag) { + Invariant checker(*this); + + // Handle aliased replace + using Sel = std::integral_constant< + bool, + std::is_same::value || + std::is_same::value>; + if (replaceAliased(i1, i2, s1, s2, Sel())) { + return; + } + + auto const n1 = i2 - i1; + FBSTRING_ASSERT(n1 >= 0); + auto const n2 = std::distance(s1, s2); + FBSTRING_ASSERT(n2 >= 0); + + if (n1 > n2) { + // shrinks + std::copy(s1, s2, i1); + erase(i1 + n2, i2); + } else { + // grows + s1 = fbstring_detail::copy_n(s1, n1, i1).first; + insert(i2, s1, s2); + } + FBSTRING_ASSERT(isSane()); +} + +template +template +inline void basic_fbstring::replaceImpl( + iterator i1, + iterator i2, + InputIterator b, + InputIterator e, + std::input_iterator_tag) { + basic_fbstring temp(begin(), i1); + temp.append(b, e).append(i2, end()); + swap(temp); +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::rfind( + const value_type* s, size_type pos, size_type n) const { + if (n > length()) { + return npos; + } + pos = std::min(pos, length() - n); + if (n == 0) { + return pos; + } + + const_iterator i(begin() + pos); + for (;; --i) { + if (traits_type::eq(*i, *s) && traits_type::compare(&*i, s, n) == 0) { + return i - begin(); + } + if (i == begin()) { + break; + } + } + return npos; +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::find_first_of( + const value_type* s, size_type pos, size_type n) const { + if (pos > length() || n == 0) { + return npos; + } + const_iterator i(begin() + pos), finish(end()); + for (; i != finish; ++i) { + if (traits_type::find(s, n, *i) != nullptr) { + return i - begin(); + } + } + return npos; +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::find_last_of( + const value_type* s, size_type pos, size_type n) const { + if (!empty() && n > 0) { + pos = std::min(pos, length() - 1); + const_iterator i(begin() + pos); + for (;; --i) { + if (traits_type::find(s, n, *i) != nullptr) { + return i - begin(); + } + if (i == begin()) { + break; + } + } + } + return npos; +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::find_first_not_of( + const value_type* s, size_type pos, size_type n) const { + if (pos < length()) { + const_iterator i(begin() + pos), finish(end()); + for (; i != finish; ++i) { + if (traits_type::find(s, n, *i) == nullptr) { + return i - begin(); + } + } + } + return npos; +} + +template +inline typename basic_fbstring::size_type +basic_fbstring::find_last_not_of( + const value_type* s, size_type pos, size_type n) const { + if (!this->empty()) { + pos = std::min(pos, size() - 1); + const_iterator i(begin() + pos); + for (;; --i) { + if (traits_type::find(s, n, *i) == nullptr) { + return i - begin(); + } + if (i == begin()) { + break; + } + } + } + return npos; +} + // non-member functions -// C++11 21.4.8.1/2 +// C++11 21.4.8.1/1 template inline basic_fbstring operator+(const basic_fbstring& lhs, @@ -2057,24 +2436,45 @@ basic_fbstring operator+(basic_fbstring&& lhs, return std::move(lhs.append(rhs)); } +// C++11 21.4.8.1/5 template inline basic_fbstring operator+( - const typename basic_fbstring::value_type* lhs, + const E* lhs, const basic_fbstring& rhs) { // basic_fbstring result; - const typename basic_fbstring::size_type len = - basic_fbstring::traits_type::length(lhs); + const auto len = basic_fbstring::traits_type::length(lhs); + result.reserve(len + rhs.size()); + result.append(lhs, len).append(rhs); + return result; +} + +// C++11 21.4.8.1/6 +template +inline +basic_fbstring operator+( + const E* lhs, + basic_fbstring&& rhs) { + // + const auto len = basic_fbstring::traits_type::length(lhs); + if (rhs.capacity() >= len + rhs.size()) { + // Good, at least we don't need to reallocate + rhs.insert(rhs.begin(), lhs, lhs + len); + return rhs; + } + // Meh, no go. Do it by hand since we have len already. + basic_fbstring result; result.reserve(len + rhs.size()); result.append(lhs, len).append(rhs); return result; } +// C++11 21.4.8.1/7 template inline basic_fbstring operator+( - typename basic_fbstring::value_type lhs, + E lhs, const basic_fbstring& rhs) { basic_fbstring result; @@ -2084,11 +2484,29 @@ basic_fbstring operator+( return result; } +// C++11 21.4.8.1/8 +template +inline +basic_fbstring operator+( + E lhs, + basic_fbstring&& rhs) { + // + if (rhs.capacity() > rhs.size()) { + // Good, at least we don't need to reallocate + rhs.insert(rhs.begin(), lhs); + return rhs; + } + // Meh, no go. Forward to operator+(E, const&). + auto const& rhsC = rhs; + return lhs + rhsC; +} + +// C++11 21.4.8.1/9 template inline basic_fbstring operator+( const basic_fbstring& lhs, - const typename basic_fbstring::value_type* rhs) { + const E* rhs) { typedef typename basic_fbstring::size_type size_type; typedef typename basic_fbstring::traits_type traits_type; @@ -2100,11 +2518,22 @@ basic_fbstring operator+( return result; } +// C++11 21.4.8.1/10 +template +inline +basic_fbstring operator+( + basic_fbstring&& lhs, + const E* rhs) { + // + return std::move(lhs += rhs); +} + +// C++11 21.4.8.1/11 template inline basic_fbstring operator+( const basic_fbstring& lhs, - typename basic_fbstring::value_type rhs) { + E rhs) { basic_fbstring result; result.reserve(lhs.size() + 1); @@ -2113,6 +2542,16 @@ basic_fbstring operator+( return result; } +// C++11 21.4.8.1/12 +template +inline +basic_fbstring operator+( + basic_fbstring&& lhs, + E rhs) { + // + return std::move(lhs += rhs); +} + template inline bool operator==(const basic_fbstring& lhs, @@ -2238,32 +2677,34 @@ std::basic_istream< std::basic_istream::value_type, typename basic_fbstring::traits_type>& is, basic_fbstring& str) { - typename std::basic_istream::sentry sentry(is); - typedef std::basic_istream::value_type, - typename basic_fbstring::traits_type> - __istream_type; - typedef typename __istream_type::ios_base __ios_base; + typedef std::basic_istream< + typename basic_fbstring::value_type, + typename basic_fbstring::traits_type> + _istream_type; + typename _istream_type::sentry sentry(is); size_t extracted = 0; - auto err = __ios_base::goodbit; + auto err = _istream_type::goodbit; if (sentry) { auto n = is.width(); - if (n == 0) { + if (n <= 0) { n = str.max_size(); } str.erase(); - auto got = is.rdbuf()->sgetc(); - for (; extracted != n && got != T::eof() && !isspace(got); ++extracted) { - // Whew. We get to store this guy + for (auto got = is.rdbuf()->sgetc(); extracted != size_t(n); ++extracted) { + if (got == T::eof()) { + err |= _istream_type::eofbit; + is.width(0); + break; + } + if (isspace(got)) { + break; + } str.push_back(got); got = is.rdbuf()->snextc(); } - if (got == T::eof()) { - err |= __ios_base::eofbit; - is.width(0); - } } if (!extracted) { - err |= __ios_base::failbit; + err |= _istream_type::failbit; } if (err) { is.setstate(err); @@ -2280,118 +2721,128 @@ operator<<( typename basic_fbstring::traits_type>& os, const basic_fbstring& str) { #if _LIBCPP_VERSION - typename std::basic_ostream< - typename basic_fbstring::value_type, - typename basic_fbstring::traits_type>::sentry __s(os); - if (__s) { + typedef std::basic_ostream< + typename basic_fbstring::value_type, + typename basic_fbstring::traits_type> + _ostream_type; + typename _ostream_type::sentry _s(os); + if (_s) { typedef std::ostreambuf_iterator< typename basic_fbstring::value_type, typename basic_fbstring::traits_type> _Ip; size_t __len = str.size(); bool __left = - (os.flags() & std::ios_base::adjustfield) == std::ios_base::left; + (os.flags() & _ostream_type::adjustfield) == _ostream_type::left; if (__pad_and_output(_Ip(os), str.data(), __left ? str.data() + __len : str.data(), str.data() + __len, os, os.fill()).failed()) { - os.setstate(std::ios_base::badbit | std::ios_base::failbit); + os.setstate(_ostream_type::badbit | _ostream_type::failbit); } } +#elif defined(_MSC_VER) + typedef decltype(os.precision()) streamsize; + // MSVC doesn't define __ostream_insert + os.write(str.data(), static_cast(str.size())); #else std::__ostream_insert(os, str.data(), str.size()); #endif return os; } -#ifndef _LIBSTDCXX_FBSTRING - -template -inline -std::basic_istream::value_type, - typename basic_fbstring::traits_type>& -getline( - std::basic_istream::value_type, - typename basic_fbstring::traits_type>& is, - basic_fbstring& str, - typename basic_fbstring::value_type delim) { - // Use the nonstandard getdelim() - char * buf = nullptr; - size_t size = 0; - for (;;) { - // This looks quadratic but it really depends on realloc - auto const newSize = size + 128; - buf = static_cast(checkedRealloc(buf, newSize)); - is.getline(buf + size, newSize - size, delim); - if (is.bad() || is.eof() || !is.fail()) { - // done by either failure, end of file, or normal read - size += std::strlen(buf + size); - break; - } - // Here we have failed due to too short a buffer - // Minus one to discount the terminating '\0' - size = newSize - 1; - assert(buf[size] == 0); - // Clear the error so we can continue reading - is.clear(); - } - basic_fbstring result(buf, size, size + 1, - AcquireMallocatedString()); - result.swap(str); - return is; -} - -template -inline -std::basic_istream::value_type, - typename basic_fbstring::traits_type>& -getline( - std::basic_istream::value_type, - typename basic_fbstring::traits_type>& is, - basic_fbstring& str) { - // Just forward to the version with a delimiter - return getline(is, str, '\n'); -} - -#endif - template -const typename basic_fbstring::size_type -basic_fbstring::npos = - static_cast::size_type>(-1); +constexpr typename basic_fbstring::size_type + basic_fbstring::npos; #ifndef _LIBSTDCXX_FBSTRING // basic_string compatibility routines -template -inline -bool operator==(const basic_fbstring& lhs, - const std::string& rhs) { +template +inline bool operator==( + const basic_fbstring& lhs, + const std::basic_string& rhs) { return lhs.compare(0, lhs.size(), rhs.data(), rhs.size()) == 0; } -template -inline -bool operator==(const std::string& lhs, - const basic_fbstring& rhs) { +template +inline bool operator==( + const std::basic_string& lhs, + const basic_fbstring& rhs) { return rhs == lhs; } -template -inline -bool operator!=(const basic_fbstring& lhs, - const std::string& rhs) { +template +inline bool operator!=( + const basic_fbstring& lhs, + const std::basic_string& rhs) { return !(lhs == rhs); } -template -inline -bool operator!=(const std::string& lhs, - const basic_fbstring& rhs) { +template +inline bool operator!=( + const std::basic_string& lhs, + const basic_fbstring& rhs) { return !(lhs == rhs); } +template +inline bool operator<( + const basic_fbstring& lhs, + const std::basic_string& rhs) { + return lhs.compare(0, lhs.size(), rhs.data(), rhs.size()) < 0; +} + +template +inline bool operator>( + const basic_fbstring& lhs, + const std::basic_string& rhs) { + return lhs.compare(0, lhs.size(), rhs.data(), rhs.size()) > 0; +} + +template +inline bool operator<( + const std::basic_string& lhs, + const basic_fbstring& rhs) { + return rhs > lhs; +} + +template +inline bool operator>( + const std::basic_string& lhs, + const basic_fbstring& rhs) { + return rhs < lhs; +} + +template +inline bool operator<=( + const basic_fbstring& lhs, + const std::basic_string& rhs) { + return !(lhs > rhs); +} + +template +inline bool operator>=( + const basic_fbstring& lhs, + const std::basic_string& rhs) { + return !(lhs < rhs); +} + +template +inline bool operator<=( + const std::basic_string& lhs, + const basic_fbstring& rhs) { + return !(lhs > rhs); +} + +template +inline bool operator>=( + const std::basic_string& lhs, + const basic_fbstring& rhs) { + return !(lhs < rhs); +} + #if !defined(_LIBSTDCXX_FBSTRING) typedef basic_fbstring fbstring; #endif @@ -2400,11 +2851,9 @@ typedef basic_fbstring fbstring; template FOLLY_ASSUME_RELOCATABLE(basic_fbstring); -#else -_GLIBCXX_END_NAMESPACE_VERSION #endif -} // namespace folly +FOLLY_FBSTRING_END_NAMESPACE #ifndef _LIBSTDCXX_FBSTRING @@ -2412,53 +2861,48 @@ _GLIBCXX_END_NAMESPACE_VERSION // // Handle interaction with different C++ standard libraries, which // expect these types to be in different namespaces. -namespace std { -template -struct hash > : private hash { - size_t operator()(const folly::basic_fbstring & s) const { - return hash::operator()(s.c_str()); - } -}; +#define FOLLY_FBSTRING_HASH1(T) \ + template <> \ + struct hash< ::folly::basic_fbstring> { \ + size_t operator()(const ::folly::basic_fbstring& s) const { \ + return ::folly::hash::fnv32_buf(s.data(), s.size() * sizeof(T)); \ + } \ + }; -template <> -struct hash< ::folly::fbstring> { - size_t operator()(const ::folly::fbstring& s) const { - return ::folly::hash::fnv32_buf(s.data(), s.size()); - } -}; +// The C++11 standard says that these four are defined +#define FOLLY_FBSTRING_HASH \ + FOLLY_FBSTRING_HASH1(char) \ + FOLLY_FBSTRING_HASH1(char16_t) \ + FOLLY_FBSTRING_HASH1(char32_t) \ + FOLLY_FBSTRING_HASH1(wchar_t) -} +namespace std { -#if FOLLY_HAVE_DEPRECATED_ASSOC -#if defined(_GLIBCXX_SYMVER) && !defined(__BIONIC__) -namespace __gnu_cxx { +FOLLY_FBSTRING_HASH -template -struct hash > : private hash { - size_t operator()(const folly::basic_fbstring & s) const { - return hash::operator()(s.c_str()); - } -}; - -template <> -struct hash< ::folly::fbstring> { - size_t operator()(const ::folly::fbstring& s) const { - return ::folly::hash::fnv32_buf(s.data(), s.size()); - } -}; +} // namespace std -} -#endif // _GLIBCXX_SYMVER && !__BIONIC__ -#endif // FOLLY_HAVE_DEPRECATED_ASSOC +#undef FOLLY_FBSTRING_HASH +#undef FOLLY_FBSTRING_HASH1 #endif // _LIBSTDCXX_FBSTRING -#pragma GCC diagnostic pop +FOLLY_POP_WARNING -#undef FBSTRING_DISABLE_ADDRESS_SANITIZER +#undef FBSTRING_DISABLE_SSO +#undef FBSTRING_SANITIZE_ADDRESS #undef throw #undef FBSTRING_LIKELY #undef FBSTRING_UNLIKELY +#undef FBSTRING_ASSERT -#endif // FOLLY_BASE_FBSTRING_H_ +#ifndef _LIBSTDCXX_FBSTRING +namespace folly { +template +struct IsSomeString; + +template <> +struct IsSomeString : std::true_type {}; +} // namespace folly +#endif