X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FFBString.h;h=b3ec8f43eff94d90987e1fefd3dc4e810dca4fd4;hb=8cf0c3e0f10e74e89f0b868e4168ec019ed9b998;hp=424431f9135edf39623b62c26043cf2a040115e6;hpb=d6a1e277540b68ac69c242603c575564f388074c;p=folly.git diff --git a/folly/FBString.h b/folly/FBString.h index 424431f9..b3ec8f43 100644 --- a/folly/FBString.h +++ b/folly/FBString.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Facebook, Inc. + * Copyright 2016 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,88 +17,54 @@ // @author: Andrei Alexandrescu (aalexandre) // String type. -#ifndef FOLLY_BASE_FBSTRING_H_ -#define FOLLY_BASE_FBSTRING_H_ - -/** - fbstring's behavior can be configured via two macro definitions, as - follows. Normally, fbstring does not write a '\0' at the end of - each string whenever it changes the underlying characters. Instead, - it lazily writes the '\0' whenever either c_str() or data() - called. - - This is standard-compliant behavior and may save costs in some - circumstances. However, it may be surprising to some client code - because c_str() and data() are const member functions (fbstring - uses the "mutable" storage class for its own state). - - In order to appease client code that expects fbstring to be - zero-terminated at all times, if the preprocessor symbol - FBSTRING_CONSERVATIVE is defined, fbstring does exactly that, - i.e. it goes the extra mile to guarantee a '\0' is always planted - at the end of its data. - - On the contrary, if the desire is to debug faulty client code that - unduly assumes the '\0' is present, fbstring plants a '^' (i.e., - emphatically NOT a zero) at the end of each string if - FBSTRING_PERVERSE is defined. (Calling c_str() or data() still - writes the '\0', of course.) - - The preprocessor symbols FBSTRING_PERVERSE and - FBSTRING_CONSERVATIVE cannot be defined simultaneously. This is - enforced during preprocessing. -*/ - -//#define FBSTRING_PERVERSE -//#define FBSTRING_CONSERVATIVE - -#ifdef FBSTRING_PERVERSE -#ifdef FBSTRING_CONSERVATIVE -#error Cannot define both FBSTRING_PERVERSE and FBSTRING_CONSERVATIVE. -#endif -#endif +#pragma once #include #include #include -#include - -#include "folly/Portability.h" -// libc++ doesn't provide this header, nor does msvc -#ifdef FOLLY_HAVE_BITS_CXXCONFIG_H // This file appears in two locations: inside fbcode and in the // libstdc++ source code (when embedding fbstring as std::string). -// To aid in this schizophrenic use, two macros are defined in -// c++config.h: -// _LIBSTDCXX_FBSTRING - Set inside libstdc++. This is useful to -// gate use inside fbcode v. libstdc++ -#include -#endif - +// To aid in this schizophrenic use, _LIBSTDCXX_FBSTRING is defined in +// libstdc++'s c++config.h, to gate use inside fbcode v. libstdc++. #ifdef _LIBSTDCXX_FBSTRING #pragma GCC system_header +// When used as std::string replacement always disable assertions. +#ifndef NDEBUG +#define NDEBUG +#define FOLLY_DEFINED_NDEBUG_FOR_FBSTRING +#endif // NDEBUG + // Handle the cases where the fbcode version (folly/Malloc.h) is included // either before or after this inclusion. #ifdef FOLLY_MALLOC_H_ #undef FOLLY_MALLOC_H_ -#include "basic_fbstring_malloc.h" +#include "basic_fbstring_malloc.h" // nolint #else -#include "basic_fbstring_malloc.h" +#include "basic_fbstring_malloc.h" // nolint #undef FOLLY_MALLOC_H_ #endif #else // !_LIBSTDCXX_FBSTRING +#include + +// libc++ doesn't provide this header, nor does msvc +#ifdef FOLLY_HAVE_BITS_CXXCONFIG_H +#include +#endif + #include #include #include +#include -#include "folly/Traits.h" -#include "folly/Malloc.h" -#include "folly/Hash.h" +#include +#include +#include +#include #if FOLLY_HAVE_DEPRECATED_ASSOC #ifdef _GLIBCXX_SYMVER @@ -119,12 +85,17 @@ #define FBSTRING_UNLIKELY(x) (x) #endif -// Ignore shadowing warnings within this file, so includers can use -Wshadow. #pragma GCC diagnostic push +// Ignore shadowing warnings within this file, so includers can use -Wshadow. #pragma GCC diagnostic ignored "-Wshadow" +// GCC 4.9 has a false positive in setSmallSize (probably +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124), disable +// compile-time array bound checking. +#pragma GCC diagnostic ignored "-Warray-bounds" // FBString cannot use throw when replacing std::string, though it may still // use std::__throw_* +// nolint #define throw FOLLY_FBSTRING_MAY_NOT_USE_THROW #ifdef _LIBSTDCXX_FBSTRING @@ -134,28 +105,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION namespace folly { #endif -// Different versions of gcc/clang support different versions of -// the address sanitizer attribute. Unfortunately, this attribute -// has issues when inlining is used, so disable that as well. #if defined(__clang__) # if __has_feature(address_sanitizer) -# if __has_attribute(__no_address_safety_analysis__) -# define FBSTRING_DISABLE_ADDRESS_SANITIZER \ - __attribute__((__no_address_safety_analysis__, __noinline__)) -# elif __has_attribute(__no_sanitize_address__) -# define FBSTRING_DISABLE_ADDRESS_SANITIZER \ - __attribute__((__no_sanitize_address__, __noinline__)) -# endif +# define FBSTRING_SANITIZE_ADDRESS # endif #elif defined (__GNUC__) && \ - (__GNUC__ == 4) && \ - (__GNUC_MINOR__ >= 8) && \ + (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) || (__GNUC__ >= 5)) && \ __SANITIZE_ADDRESS__ -# define FBSTRING_DISABLE_ADDRESS_SANITIZER \ - __attribute__((__no_address_safety_analysis__, __noinline__)) +# define FBSTRING_SANITIZE_ADDRESS #endif -#ifndef FBSTRING_DISABLE_ADDRESS_SANITIZER -# define FBSTRING_DISABLE_ADDRESS_SANITIZER + +// When compiling with ASan, always heap-allocate the string even if +// it would fit in-situ, so that ASan can detect access to the string +// buffer after it has been invalidated (destroyed, resized, etc.). +// Note that this flag doesn't remove support for in-situ strings, as +// that would break ABI-compatibility and wouldn't allow linking code +// compiled with this flag with code compiled without. +#ifdef FBSTRING_SANITIZE_ADDRESS +# define FBSTRING_DISABLE_SSO true +#else +# define FBSTRING_DISABLE_SSO false #endif namespace fbstring_detail { @@ -266,10 +235,14 @@ public: void shrink(size_t delta); // Expands the string by delta characters (i.e. after this call // size() will report the old size() plus delta) but without - // initializing the expanded region. Returns a pointer to the memory - // to be initialized (the beginning of the expanded portion). The - // caller is expected to fill the expanded area appropriately. - Char* expand_noinit(size_t delta); + // initializing the expanded region. The expanded region is + // zero-terminated. Returns a pointer to the memory to be + // initialized (the beginning of the expanded portion). The caller + // is expected to fill the expanded area appropriately. + // If expGrowth is true, exponential growth is guaranteed. + // It is not guaranteed not to reallocate even if size() + delta < + // capacity(), so all references to the buffer are invalidated. + Char* expand_noinit(size_t delta, bool expGrowth); // Expands the string by one character and sets the last character // to c. void push_back(Char c); @@ -293,19 +266,10 @@ private: }; */ -/** - * gcc-4.7 throws what appears to be some false positive uninitialized - * warnings for the members of the MediumLarge struct. So, mute them here. - */ -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wuninitialized" -#endif - /** * This is the core of the string. The code should work on 32- and - * 64-bit architectures and with any Char size. Porting to big endian - * architectures would require some changes. + * 64-bit and both big- and little-endianan architectures with any + * Char size. * * The storage is selected as follows (assuming we store one-byte * characters on a 64-bit machine): (a) "small" strings between 0 and @@ -317,91 +281,94 @@ private: * reference-counted and copied lazily. the reference count is * allocated right before the character array. * - * The discriminator between these three strategies sits in the two - * most significant bits of the rightmost char of the storage. If - * neither is set, then the string is small (and its length sits in - * the lower-order bits of that rightmost character). If the MSb is - * set, the string is medium width. If the second MSb is set, then the - * string is large. + * The discriminator between these three strategies sits in two + * bits of the rightmost char of the storage. If neither is set, then the + * string is small (and its length sits in the lower-order bits on + * little-endian or the high-order bits on big-endian of that + * rightmost character). If the MSb is set, the string is medium width. + * If the second MSb is set, then the string is large. On little-endian, + * these 2 bits are the 2 MSbs of MediumLarge::capacity_, while on + * big-endian, these 2 bits are the 2 LSbs. This keeps both little-endian + * and big-endian fbstring_core equivalent with merely different ops used + * to extract capacity/category. */ template class fbstring_core { +protected: +// It's MSVC, so we just have to guess ... and allow an override +#ifdef _MSC_VER +# ifdef FOLLY_ENDIAN_BE + static constexpr auto kIsLittleEndian = false; +# else + static constexpr auto kIsLittleEndian = true; +# endif +#else + static constexpr auto kIsLittleEndian = + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; +#endif public: - fbstring_core() noexcept { - // Only initialize the tag, will set the MSBs (i.e. the small - // string size) to zero too - ml_.capacity_ = maxSmallSize << (8 * (sizeof(size_t) - sizeof(Char))); - // or: setSmallSize(0); - writeTerminator(); - assert(category() == isSmall && size() == 0); - } + fbstring_core() noexcept { reset(); } fbstring_core(const fbstring_core & rhs) { assert(&rhs != this); // Simplest case first: small strings are bitblitted - if (rhs.category() == isSmall) { + if (rhs.category() == Category::isSmall) { static_assert(offsetof(MediumLarge, data_) == 0, "fbstring layout failure"); static_assert(offsetof(MediumLarge, size_) == sizeof(ml_.data_), "fbstring layout failure"); static_assert(offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_), "fbstring layout failure"); - const size_t size = rhs.smallSize(); - if (size == 0) { - ml_.capacity_ = rhs.ml_.capacity_; - writeTerminator(); - } else { - // Just write the whole thing, don't look at details. In - // particular we need to copy capacity anyway because we want - // to set the size (don't forget that the last character, - // which stores a short string's length, is shared with the - // ml_.capacity field). - ml_ = rhs.ml_; - } - assert(category() == isSmall && this->size() == rhs.size()); - } else if (rhs.category() == isLarge) { + // Just write the whole thing, don't look at details. In + // particular we need to copy capacity anyway because we want + // to set the size (don't forget that the last character, + // which stores a short string's length, is shared with the + // ml_.capacity field). + ml_ = rhs.ml_; + assert(category() == Category::isSmall && this->size() == rhs.size()); + } else if (rhs.category() == Category::isLarge) { // Large strings are just refcounted ml_ = rhs.ml_; RefCounted::incrementRefs(ml_.data_); - assert(category() == isLarge && size() == rhs.size()); + assert(category() == Category::isLarge && size() == rhs.size()); } else { // Medium strings are copied eagerly. Don't forget to allocate // one extra Char for the null terminator. auto const allocSize = goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char)); ml_.data_ = static_cast(checkedMalloc(allocSize)); + // Also copies terminator. fbstring_detail::pod_copy(rhs.ml_.data_, - // 1 for terminator rhs.ml_.data_ + rhs.ml_.size_ + 1, ml_.data_); - // No need for writeTerminator() here, we copied one extra - // element just above. ml_.size_ = rhs.ml_.size_; - ml_.capacity_ = (allocSize / sizeof(Char) - 1) | isMedium; - assert(category() == isMedium); + ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); + assert(category() == Category::isMedium); } assert(size() == rhs.size()); assert(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0); } fbstring_core(fbstring_core&& goner) noexcept { - if (goner.category() == isSmall) { - // Just copy, leave the goner in peace - new(this) fbstring_core(goner.small_, goner.smallSize()); - } else { - // Take goner's guts - ml_ = goner.ml_; - // Clean goner's carcass - goner.setSmallSize(0); - } + // Take goner's guts + ml_ = goner.ml_; + // Clean goner's carcass + goner.reset(); } - // NOTE(agallagher): The word-aligned copy path copies bytes which are - // outside the range of the string, and makes address sanitizer unhappy, - // so just disable it on this function. - fbstring_core(const Char *const data, const size_t size) - FBSTRING_DISABLE_ADDRESS_SANITIZER { + fbstring_core(const Char *const data, + const size_t size, + bool disableSSO = FBSTRING_DISABLE_SSO) { +#ifndef NDEBUG +#ifndef _LIBSTDCXX_FBSTRING + SCOPE_EXIT { + assert(this->size() == size); + assert(size == 0 || memcmp(this->data(), data, size * sizeof(Char)) == 0); + }; +#endif +#endif + // Simplest case first: small strings are bitblitted - if (size <= maxSmallSize) { + if (!disableSSO && size <= maxSmallSize) { // Layout is: Char* data_, size_t size_, size_t capacity_ static_assert(sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t), "fbstring has unexpected size"); @@ -413,54 +380,58 @@ public: // If data is aligned, use fast word-wise copying. Otherwise, // use conservative memcpy. - if (reinterpret_cast(data) & (sizeof(size_t) - 1)) { - fbstring_detail::pod_copy(data, data + size, small_); - } else { - // Copy one word (64 bits) at a time + // The word-wise path reads bytes which are outside the range of + // the string, and makes ASan unhappy, so we disable it when + // compiling with ASan. +#ifndef FBSTRING_SANITIZE_ADDRESS + if ((reinterpret_cast(data) & (sizeof(size_t) - 1)) == 0) { const size_t byteSize = size * sizeof(Char); - if (byteSize > 2 * sizeof(size_t)) { - // Copy three words + constexpr size_t wordWidth = sizeof(size_t); + switch ((byteSize + wordWidth - 1) / wordWidth) { // Number of words. + case 3: ml_.capacity_ = reinterpret_cast(data)[2]; - copyTwo: + case 2: ml_.size_ = reinterpret_cast(data)[1]; - copyOne: + case 1: ml_.data_ = *reinterpret_cast(const_cast(data)); - } else if (byteSize > sizeof(size_t)) { - // Copy two words - goto copyTwo; - } else if (size > 0) { - // Copy one word - goto copyOne; + case 0: + break; + } + } else +#endif + { + if (size != 0) { + fbstring_detail::pod_copy(data, data + size, small_); } } setSmallSize(size); - } else if (size <= maxMediumSize) { - // Medium strings are allocated normally. Don't forget to - // allocate one extra Char for the terminating null. - auto const allocSize = goodMallocSize((1 + size) * sizeof(Char)); - ml_.data_ = static_cast(checkedMalloc(allocSize)); - fbstring_detail::pod_copy(data, data + size, ml_.data_); - ml_.size_ = size; - ml_.capacity_ = (allocSize / sizeof(Char) - 1) | isMedium; } else { - // Large strings are allocated differently - size_t effectiveCapacity = size; - auto const newRC = RefCounted::create(data, & effectiveCapacity); - ml_.data_ = newRC->data_; - ml_.size_ = size; - ml_.capacity_ = effectiveCapacity | isLarge; + if (size <= maxMediumSize) { + // Medium strings are allocated normally. Don't forget to + // allocate one extra Char for the terminating null. + auto const allocSize = goodMallocSize((1 + size) * sizeof(Char)); + ml_.data_ = static_cast(checkedMalloc(allocSize)); + fbstring_detail::pod_copy(data, data + size, ml_.data_); + ml_.size_ = size; + ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); + } else { + // Large strings are allocated differently + size_t effectiveCapacity = size; + auto const newRC = RefCounted::create(data, & effectiveCapacity); + ml_.data_ = newRC->data_; + ml_.size_ = size; + ml_.setCapacity(effectiveCapacity, Category::isLarge); + } + ml_.data_[size] = '\0'; } - writeTerminator(); - assert(this->size() == size); - assert(memcmp(this->data(), data, size * sizeof(Char)) == 0); } ~fbstring_core() noexcept { auto const c = category(); - if (c == isSmall) { + if (c == Category::isSmall) { return; } - if (c == isMedium) { + if (c == Category::isMedium) { free(ml_.data_); return; } @@ -485,11 +456,11 @@ public: ml_.data_ = data; ml_.size_ = size; // Don't forget about null terminator - ml_.capacity_ = (allocatedSize - 1) | isMedium; + ml_.setCapacity(allocatedSize - 1, Category::isMedium); } else { // No need for the memory free(data); - setSmallSize(0); + reset(); } } @@ -510,66 +481,49 @@ public: Char * mutable_data() { auto const c = category(); - if (c == isSmall) { + if (c == Category::isSmall) { return small_; } - assert(c == isMedium || c == isLarge); - if (c == isLarge && RefCounted::refs(ml_.data_) > 1) { + assert(c == Category::isMedium || c == Category::isLarge); + if (c == Category::isLarge && RefCounted::refs(ml_.data_) > 1) { // Ensure unique. size_t effectiveCapacity = ml_.capacity(); auto const newRC = RefCounted::create(& effectiveCapacity); // If this fails, someone placed the wrong capacity in an // fbstring. assert(effectiveCapacity >= ml_.capacity()); + // Also copies terminator. fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_); RefCounted::decrementRefs(ml_.data_); ml_.data_ = newRC->data_; - // No need to call writeTerminator(), we have + 1 above. } return ml_.data_; } const Char * c_str() const { auto const c = category(); -#ifdef FBSTRING_PERVERSE - if (c == isSmall) { - assert(small_[smallSize()] == TERMINATOR || smallSize() == maxSmallSize - || small_[smallSize()] == '\0'); - small_[smallSize()] = '\0'; - return small_; - } - assert(c == isMedium || c == isLarge); - assert(ml_.data_[ml_.size_] == TERMINATOR || ml_.data_[ml_.size_] == '\0'); - ml_.data_[ml_.size_] = '\0'; -#elif defined(FBSTRING_CONSERVATIVE) - if (c == isSmall) { + if (c == Category::isSmall) { assert(small_[smallSize()] == '\0'); return small_; } - assert(c == isMedium || c == isLarge); + assert(c == Category::isMedium || c == Category::isLarge); assert(ml_.data_[ml_.size_] == '\0'); -#else - if (c == isSmall) { - small_[smallSize()] = '\0'; - return small_; - } - assert(c == isMedium || c == isLarge); - ml_.data_[ml_.size_] = '\0'; -#endif return ml_.data_; } void shrink(const size_t delta) { - if (category() == isSmall) { + if (category() == Category::isSmall) { // Check for underflow assert(delta <= smallSize()); setSmallSize(smallSize() - delta); - } else if (category() == isMedium || RefCounted::refs(ml_.data_) == 1) { + } else if (category() == Category::isMedium || + RefCounted::refs(ml_.data_) == 1) { // Medium strings and unique large strings need no special // handling. assert(ml_.size_ >= delta); ml_.size_ -= delta; + ml_.data_[ml_.size_] = '\0'; } else { assert(ml_.size_ >= delta); // Shared large string, must make unique. This is because of the @@ -579,13 +533,11 @@ public: fbstring_core(ml_.data_, ml_.size_ - delta).swap(*this); } // No need to write the terminator. - return; } - writeTerminator(); } - void reserve(size_t minCapacity) { - if (category() == isLarge) { + void reserve(size_t minCapacity, bool disableSSO = FBSTRING_DISABLE_SSO) { + if (category() == Category::isLarge) { // Ensure unique if (RefCounted::refs(ml_.data_) > 1) { // We must make it unique regardless; in-place reallocation is @@ -595,13 +547,12 @@ public: // call to reserve. minCapacity = std::max(minCapacity, ml_.capacity()); auto const newRC = RefCounted::create(& minCapacity); + // Also copies terminator. fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_); - // Done with the old data. No need to call writeTerminator(), - // we have + 1 above. RefCounted::decrementRefs(ml_.data_); ml_.data_ = newRC->data_; - ml_.capacity_ = minCapacity | isLarge; + ml_.setCapacity(minCapacity, Category::isLarge); // size remains unchanged } else { // String is not shared, so let's try to realloc (if needed) @@ -611,12 +562,11 @@ public: RefCounted::reallocate(ml_.data_, ml_.size_, ml_.capacity(), minCapacity); ml_.data_ = newRC->data_; - ml_.capacity_ = minCapacity | isLarge; - writeTerminator(); + ml_.setCapacity(minCapacity, Category::isLarge); } assert(capacity() >= minCapacity); } - } else if (category() == isMedium) { + } else if (category() == Category::isMedium) { // String is not shared if (minCapacity <= ml_.capacity()) { return; // nothing to do, there's enough room @@ -625,123 +575,102 @@ public: // Keep the string at medium size. Don't forget to allocate // one extra Char for the terminating null. size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); + // Also copies terminator. ml_.data_ = static_cast( smartRealloc( ml_.data_, - ml_.size_ * sizeof(Char), + (ml_.size_ + 1) * sizeof(Char), (ml_.capacity() + 1) * sizeof(Char), capacityBytes)); - writeTerminator(); - ml_.capacity_ = (capacityBytes / sizeof(Char) - 1) | isMedium; + ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium); } else { // Conversion from medium to large string fbstring_core nascent; // Will recurse to another branch of this function nascent.reserve(minCapacity); nascent.ml_.size_ = ml_.size_; - fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_, + // Also copies terminator. + fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1, nascent.ml_.data_); nascent.swap(*this); - writeTerminator(); assert(capacity() >= minCapacity); } } else { - assert(category() == isSmall); - if (minCapacity > maxMediumSize) { - // large - auto const newRC = RefCounted::create(& minCapacity); - auto const size = smallSize(); - fbstring_detail::pod_copy(small_, small_ + size + 1, newRC->data_); - // No need for writeTerminator(), we wrote it above with + 1. - ml_.data_ = newRC->data_; - ml_.size_ = size; - ml_.capacity_ = minCapacity | isLarge; - assert(capacity() >= minCapacity); - } else if (minCapacity > maxSmallSize) { + assert(category() == Category::isSmall); + if (!disableSSO && minCapacity <= maxSmallSize) { + // small + // Nothing to do, everything stays put + } else if (minCapacity <= maxMediumSize) { // medium // Don't forget to allocate one extra Char for the terminating null auto const allocSizeBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); - auto const data = static_cast(checkedMalloc(allocSizeBytes)); + auto const pData = static_cast(checkedMalloc(allocSizeBytes)); auto const size = smallSize(); - fbstring_detail::pod_copy(small_, small_ + size + 1, data); - // No need for writeTerminator(), we wrote it above with + 1. - ml_.data_ = data; + // Also copies terminator. + fbstring_detail::pod_copy(small_, small_ + size + 1, pData); + ml_.data_ = pData; ml_.size_ = size; - ml_.capacity_ = (allocSizeBytes / sizeof(Char) - 1) | isMedium; + ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium); } else { - // small - // Nothing to do, everything stays put + // large + auto const newRC = RefCounted::create(& minCapacity); + auto const size = smallSize(); + // Also copies terminator. + fbstring_detail::pod_copy(small_, small_ + size + 1, newRC->data_); + ml_.data_ = newRC->data_; + ml_.size_ = size; + ml_.setCapacity(minCapacity, Category::isLarge); + assert(capacity() >= minCapacity); } } assert(capacity() >= minCapacity); } - Char * expand_noinit(const size_t delta) { + Char * expand_noinit(const size_t delta, + bool expGrowth = false, + bool disableSSO = FBSTRING_DISABLE_SSO) { // Strategy is simple: make room, then change size assert(capacity() >= size()); size_t sz, newSz; - if (category() == isSmall) { + if (category() == Category::isSmall) { sz = smallSize(); newSz = sz + delta; - if (newSz <= maxSmallSize) { + if (!disableSSO && FBSTRING_LIKELY(newSz <= maxSmallSize)) { setSmallSize(newSz); - writeTerminator(); return small_ + sz; } - reserve(newSz); + reserve(expGrowth ? std::max(newSz, 2 * maxSmallSize) : newSz); } else { sz = ml_.size_; - newSz = ml_.size_ + delta; - if (newSz > capacity()) { - reserve(newSz); + newSz = sz + delta; + if (FBSTRING_UNLIKELY(newSz > capacity())) { + // ensures not shared + reserve(expGrowth ? std::max(newSz, 1 + capacity() * 3 / 2) : newSz); } } assert(capacity() >= newSz); // Category can't be small - we took care of that above - assert(category() == isMedium || category() == isLarge); + assert(category() == Category::isMedium || category() == Category::isLarge); ml_.size_ = newSz; - writeTerminator(); + ml_.data_[newSz] = '\0'; assert(size() == newSz); return ml_.data_ + sz; } void push_back(Char c) { - assert(capacity() >= size()); - size_t sz; - if (category() == isSmall) { - sz = smallSize(); - if (sz < maxSmallSize) { - setSmallSize(sz + 1); - small_[sz] = c; - writeTerminator(); - return; - } - reserve(maxSmallSize * 2); - } else { - sz = ml_.size_; - if (sz == capacity()) { // always true for isShared() - reserve(1 + sz * 3 / 2); // ensures not shared - } - } - assert(!isShared()); - assert(capacity() >= sz + 1); - // Category can't be small - we took care of that above - assert(category() == isMedium || category() == isLarge); - ml_.size_ = sz + 1; - ml_.data_[sz] = c; - writeTerminator(); + *expand_noinit(1, /* expGrowth = */ true) = c; } size_t size() const { - return category() == isSmall ? smallSize() : ml_.size_; + return category() == Category::isSmall ? smallSize() : ml_.size_; } size_t capacity() const { switch (category()) { - case isSmall: + case Category::isSmall: return maxSmallSize; - case isLarge: + case Category::isLarge: // For large-sized strings, a multi-referenced chunk has no // available capacity. This is because any attempt to append // data would trigger a new allocation. @@ -752,41 +681,22 @@ public: } bool isShared() const { - return category() == isLarge && RefCounted::refs(ml_.data_) > 1; - } - -#ifdef FBSTRING_PERVERSE - enum { TERMINATOR = '^' }; -#else - enum { TERMINATOR = '\0' }; -#endif - - void writeTerminator() { -#if defined(FBSTRING_PERVERSE) || defined(FBSTRING_CONSERVATIVE) - if (category() == isSmall) { - const auto s = smallSize(); - if (s != maxSmallSize) { - small_[s] = TERMINATOR; - } - } else { - ml_.data_[ml_.size_] = TERMINATOR; - } -#endif + return category() == Category::isLarge && RefCounted::refs(ml_.data_) > 1; } private: // Disabled fbstring_core & operator=(const fbstring_core & rhs); - struct MediumLarge { - Char * data_; - size_t size_; - size_t capacity_; - - size_t capacity() const { - return capacity_ & capacityExtractMask; - } - }; + // Equivalent to setSmallSize(0) but a few ns faster in + // microbenchmarks. + void reset() { + ml_.capacity_ = kIsLittleEndian + ? maxSmallSize << (8 * (sizeof(size_t) - sizeof(Char))) + : maxSmallSize << 2; + small_[0] = '\0'; + assert(category() == Category::isSmall && size() == 0); + } struct RefCounted { std::atomic refCount_; @@ -855,39 +765,68 @@ private: } }; + typedef std::conditional::type + category_type; + + enum class Category : category_type { + isSmall = 0, + isMedium = kIsLittleEndian + ? sizeof(size_t) == 4 ? 0x80000000 : 0x8000000000000000 + : 0x2, + isLarge = kIsLittleEndian + ? sizeof(size_t) == 4 ? 0x40000000 : 0x4000000000000000 + : 0x1, + }; + + Category category() const { + // works for both big-endian and little-endian + return static_cast(ml_.capacity_ & categoryExtractMask); + } + + struct MediumLarge { + Char * data_; + size_t size_; + size_t capacity_; + + size_t capacity() const { + return kIsLittleEndian + ? capacity_ & capacityExtractMask + : capacity_ >> 2; + } + + void setCapacity(size_t cap, Category cat) { + capacity_ = kIsLittleEndian + ? cap | static_cast(cat) + : (cap << 2) | static_cast(cat); + } + }; + union { - mutable Char small_[sizeof(MediumLarge) / sizeof(Char)]; - mutable MediumLarge ml_; + Char small_[sizeof(MediumLarge) / sizeof(Char)]; + MediumLarge ml_; }; - enum { + enum : size_t { lastChar = sizeof(MediumLarge) - 1, maxSmallSize = lastChar / sizeof(Char), maxMediumSize = 254 / sizeof(Char), // coincides with the small // bin size in dlmalloc - categoryExtractMask = sizeof(size_t) == 4 ? 0xC0000000 : 0xC000000000000000, - capacityExtractMask = ~categoryExtractMask, + categoryExtractMask = kIsLittleEndian + ? sizeof(size_t) == 4 ? 0xC0000000 : 0xC000000000000000 + : 0x3, + capacityExtractMask = kIsLittleEndian + ? ~categoryExtractMask + : 0x0 /*unused*/, }; static_assert(!(sizeof(MediumLarge) % sizeof(Char)), "Corrupt memory layout for fbstring."); - enum Category { - isSmall = 0, - isMedium = sizeof(size_t) == 4 ? 0x80000000 : 0x8000000000000000, - isLarge = sizeof(size_t) == 4 ? 0x40000000 : 0x4000000000000000, - }; - - Category category() const { - // Assumes little endian - return static_cast(ml_.capacity_ & categoryExtractMask); - } - size_t smallSize() const { - assert(category() == isSmall && - static_cast(small_[maxSmallSize]) - <= static_cast(maxSmallSize)); - return static_cast(maxSmallSize) - - static_cast(small_[maxSmallSize]); + assert(category() == Category::isSmall); + constexpr auto shift = kIsLittleEndian ? 0 : 2; + auto smallShifted = static_cast(small_[maxSmallSize]) >> shift; + assert(static_cast(maxSmallSize) >= smallShifted); + return static_cast(maxSmallSize) - smallShifted; } void setSmallSize(size_t s) { @@ -895,14 +834,13 @@ private: // so don't assume anything about the previous value of // small_[maxSmallSize]. assert(s <= maxSmallSize); - small_[maxSmallSize] = maxSmallSize - s; + constexpr auto shift = kIsLittleEndian ? 0 : 2; + small_[maxSmallSize] = (maxSmallSize - s) << shift; + small_[s] = '\0'; + assert(category() == Category::isSmall && size() == s); } }; -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic pop -#endif - #ifndef _LIBSTDCXX_FBSTRING /** * Dummy fbstring core that uses an actual std::string. This doesn't @@ -989,7 +927,7 @@ class basic_fbstring { size() <= max_size() && capacity() <= max_size() && size() <= capacity() && - (begin()[size()] == Storage::TERMINATOR || begin()[size()] == '\0'); + begin()[size()] == '\0'; } struct Invariant; @@ -1037,6 +975,7 @@ public: > const_reverse_iterator; static const size_type npos; // = size_type(-1) + typedef std::true_type IsRelocatable; private: static void procrustes(size_type& n, size_type nmax) { @@ -1045,7 +984,23 @@ private: public: // C++11 21.4.2 construct/copy/destroy - explicit basic_fbstring(const A& a = A()) noexcept { + + // Note: while the following two constructors can be (and previously were) + // collapsed into one constructor written this way: + // + // explicit basic_fbstring(const A& a = A()) noexcept { } + // + // This can cause Clang (at least version 3.7) to fail with the error: + // "chosen constructor is explicit in copy-initialization ... + // in implicit initialization of field '(x)' with omitted initializer" + // + // if used in a struct which is default-initialized. Hence the split into + // these two separate constructors. + + basic_fbstring() noexcept : basic_fbstring(A()) { + } + + explicit basic_fbstring(const A&) noexcept { } basic_fbstring(const basic_fbstring& str) @@ -1064,36 +1019,35 @@ public: } #endif - basic_fbstring(const basic_fbstring& str, size_type pos, - size_type n = npos, const A& a = A()) { + basic_fbstring(const basic_fbstring& str, + size_type pos, + size_type n = npos, + const A& /* a */ = A()) { assign(str, pos, n); } - /* implicit */ basic_fbstring(const value_type* s, const A& a = A()) + /* implicit */ basic_fbstring(const value_type* s, const A& /*a*/ = A()) : store_(s, s ? traits_type::length(s) - : [] { - std::__throw_logic_error( - "basic_fbstring: null pointer initializer not valid"); - return 0; - }()) { + : (std::__throw_logic_error( + "basic_fbstring: null pointer initializer not valid"), + 0)) { } - basic_fbstring(const value_type* s, size_type n, const A& a = A()) + basic_fbstring(const value_type* s, size_type n, const A& /*a*/ = A()) : store_(s, n) { } - basic_fbstring(size_type n, value_type c, const A& a = A()) { - auto const data = store_.expand_noinit(n); - fbstring_detail::pod_fill(data, data + n, c); - store_.writeTerminator(); + basic_fbstring(size_type n, value_type c, const A& /*a*/ = A()) { + auto const pData = store_.expand_noinit(n); + fbstring_detail::pod_fill(pData, pData + n, c); } template basic_fbstring(InIt begin, InIt end, typename std::enable_if< !std::is_same::type, - value_type*>::value, const A>::type & a = A()) { + value_type*>::value, const A>::type & /*a*/ = A()) { assign(begin, end); } @@ -1117,6 +1071,8 @@ public: } basic_fbstring& operator=(const basic_fbstring& lhs) { + Invariant checker(*this); + if (FBSTRING_UNLIKELY(&lhs == this)) { return *this; } @@ -1124,13 +1080,15 @@ public: auto const srcSize = lhs.size(); if (capacity() >= srcSize && !store_.isShared()) { // great, just copy the contents - if (oldSize < srcSize) + if (oldSize < srcSize) { store_.expand_noinit(srcSize - oldSize); - else + } else { store_.shrink(oldSize - srcSize); + } assert(size() == srcSize); - fbstring_detail::pod_copy(lhs.begin(), lhs.end(), begin()); - store_.writeTerminator(); + auto srcData = lhs.data(); + fbstring_detail::pod_copy( + srcData, srcData + srcSize, store_.mutable_data()); } else { // need to reallocate, so we may as well create a brand new string basic_fbstring(lhs).swap(*this); @@ -1174,6 +1132,8 @@ public: } basic_fbstring& operator=(value_type c) { + Invariant checker(*this); + if (empty()) { store_.expand_noinit(1); } else if (store_.isShared()) { @@ -1182,8 +1142,7 @@ public: } else { store_.shrink(size() - 1); } - *store_.mutable_data() = c; - store_.writeTerminator(); + front() = c; return *this; } @@ -1257,29 +1216,15 @@ public: } void resize(const size_type n, const value_type c = value_type()) { + Invariant checker(*this); + auto size = this->size(); if (n <= size) { store_.shrink(size - n); } else { - // Do this in two steps to minimize slack memory copied (see - // smartRealloc). - auto const capacity = this->capacity(); - assert(capacity >= size); - if (size < capacity) { - auto delta = std::min(n, capacity) - size; - store_.expand_noinit(delta); - fbstring_detail::pod_fill(begin() + size, end(), c); - size += delta; - if (size == n) { - store_.writeTerminator(); - return; - } - assert(size < n); - } auto const delta = n - size; - store_.expand_noinit(delta); - fbstring_detail::pod_fill(end() - delta, end(), c); - store_.writeTerminator(); + auto pData = store_.expand_noinit(delta); + fbstring_detail::pod_fill(pData, pData + delta, c); } assert(this->size() == n); } @@ -1305,14 +1250,10 @@ public: // C++11 21.4.5 element access: const_reference operator[](size_type pos) const { - return *(c_str() + pos); + return *(begin() + pos); } reference operator[](size_type pos) { - if (pos == size()) { - // Just call c_str() to make sure '\0' is present - c_str(); - } return *(begin() + pos); } @@ -1363,10 +1304,8 @@ public: } basic_fbstring& append(const value_type* s, size_type n) { -#ifndef NDEBUG Invariant checker(*this); - (void) checker; -#endif + if (FBSTRING_UNLIKELY(!n)) { // Unlikely but must be done return *this; @@ -1387,15 +1326,10 @@ public: // Restore the source s = data() + offset; } - // Warning! Repeated appends with short strings may actually incur - // practically quadratic performance. Avoid that by pushing back - // the first character (which ensures exponential growth) and then - // appending the rest normally. Worst case the append may incur a - // second allocation but that will be rare. - push_back(*s++); - --n; - memcpy(store_.expand_noinit(n), s, n * sizeof(value_type)); - assert(size() == oldSize + n + 1); + + fbstring_detail::pod_copy( + s, s + n, store_.expand_noinit(n, /* expGrowth = */ true)); + assert(size() == oldSize + n); return *this; } @@ -1404,7 +1338,9 @@ public: } basic_fbstring& append(size_type n, value_type c) { - resize(size() + n, c); + Invariant checker(*this); + auto pData = store_.expand_noinit(n, /* expGrowth = */ true); + fbstring_detail::pod_fill(pData, pData + n, c); return *this; } @@ -1441,18 +1377,20 @@ public: basic_fbstring& assign(const value_type* s, const size_type n) { Invariant checker(*this); - (void) checker; - if (size() >= n) { - std::copy(s, s + n, begin()); + + // s can alias this, we need to use pod_move. + if (n == 0) { + resize(0); + } else if (size() >= n) { + fbstring_detail::pod_move(s, s + n, store_.mutable_data()); resize(n); assert(size() == n); } else { const value_type *const s2 = s + size(); - std::copy(s, s2, begin()); + fbstring_detail::pod_move(s, s2, store_.mutable_data()); append(s2, n - size()); assert(size() == n); } - store_.writeTerminator(); assert(size() == n); return *this; } @@ -1503,37 +1441,66 @@ public: return begin() + pos; } +#ifndef _LIBSTDCXX_FBSTRING + private: + typedef std::basic_istream istream_type; + + public: + friend inline istream_type& getline(istream_type& is, + basic_fbstring& str, + value_type delim) { + Invariant checker(str); + + str.clear(); + size_t size = 0; + while (true) { + size_t avail = str.capacity() - size; + // fbstring has 1 byte extra capacity for the null terminator, + // and getline null-terminates the read string. + is.getline(str.store_.expand_noinit(avail), avail + 1, delim); + size += is.gcount(); + + if (is.bad() || is.eof() || !is.fail()) { + // Done by either failure, end of file, or normal read. + if (!is.bad() && !is.eof()) { + --size; // gcount() also accounts for the delimiter. + } + str.resize(size); + break; + } + + assert(size == str.size()); + assert(size == str.capacity()); + // Start at minimum allocation 63 + terminator = 64. + str.reserve(std::max(63, 3 * size / 2)); + // Clear the error so we can continue reading. + is.clear(); + } + return is; + } + + friend inline istream_type& getline(istream_type& is, basic_fbstring& str) { + return getline(is, str, '\n'); + } +#endif + private: template class Selector {}; - iterator insertImplDiscr(const_iterator p, + iterator insertImplDiscr(const_iterator i, size_type n, value_type c, Selector<1>) { Invariant checker(*this); - (void) checker; - auto const pos = p - begin(); - assert(p >= begin() && p <= end()); - if (capacity() - size() < n) { - const size_type sz = p - begin(); - reserve(size() + n); - p = begin() + sz; - } - const iterator oldEnd = end(); - if (n < size_type(oldEnd - p)) { - append(oldEnd - n, oldEnd); - //std::copy( - // reverse_iterator(oldEnd - n), - // reverse_iterator(p), - // reverse_iterator(oldEnd)); - fbstring_detail::pod_move(&*p, &*oldEnd - n, - begin() + pos + n); - std::fill(begin() + pos, begin() + pos + n, c); - } else { - append(n - (end() - p), c); - append(iterator(p), oldEnd); - std::fill(iterator(p), oldEnd, c); - } - store_.writeTerminator(); - return begin() + pos; + + assert(i >= begin() && i <= end()); + const size_type pos = i - begin(); + + auto oldSize = size(); + store_.expand_noinit(n, /* expGrowth = */ true); + auto b = begin(); + fbstring_detail::pod_move(b + pos, b + oldSize, b + pos + n); + fbstring_detail::pod_fill(b + pos, b + pos + n, c); + + return b + pos; } template @@ -1545,43 +1512,23 @@ private: template iterator insertImpl(const_iterator i, - FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) { + FwdIterator s1, + FwdIterator s2, + std::forward_iterator_tag) { Invariant checker(*this); - (void) checker; + + assert(i >= begin() && i <= end()); const size_type pos = i - begin(); - const typename std::iterator_traits::difference_type n2 = - std::distance(s1, s2); - assert(n2 >= 0); - using namespace fbstring_detail; - assert(pos <= size()); - - const typename std::iterator_traits::difference_type maxn2 = - capacity() - size(); - if (maxn2 < n2) { - // realloc the string - reserve(size() + n2); - i = begin() + pos; - } - if (pos + n2 <= size()) { - const iterator tailBegin = end() - n2; - store_.expand_noinit(n2); - fbstring_detail::pod_copy(tailBegin, tailBegin + n2, end() - n2); - std::copy(const_reverse_iterator(tailBegin), const_reverse_iterator(i), - reverse_iterator(tailBegin + n2)); - std::copy(s1, s2, begin() + pos); - } else { - FwdIterator t = s1; - const size_type old_size = size(); - std::advance(t, old_size - pos); - const size_t newElems = std::distance(t, s2); - store_.expand_noinit(n2); - std::copy(t, s2, begin() + old_size); - fbstring_detail::pod_copy(data() + pos, data() + old_size, - begin() + old_size + newElems); - std::copy(s1, t, begin() + pos); - } - store_.writeTerminator(); - return begin() + pos; + auto n = std::distance(s1, s2); + assert(n >= 0); + + auto oldSize = size(); + store_.expand_noinit(n, /* expGrowth = */ true); + auto b = begin(); + fbstring_detail::pod_move(b + pos, b + oldSize, b + pos + n); + std::copy(s1, s2, b + pos); + + return b + pos; } template @@ -1611,7 +1558,7 @@ public: basic_fbstring& erase(size_type pos = 0, size_type n = npos) { Invariant checker(*this); - (void) checker; + enforce(pos <= length(), std::__throw_out_of_range, ""); procrustes(n, length() - pos); std::copy(begin() + pos + n, end(), begin() + pos); @@ -1665,7 +1612,7 @@ public: basic_fbstring& replace(size_type pos, size_type n1, StrOrLength s_or_n2, NumOrChar n_or_c) { Invariant checker(*this); - (void) checker; + enforce(pos <= size(), std::__throw_out_of_range, ""); procrustes(n1, length() - pos); const iterator b = begin() + pos; @@ -1714,9 +1661,12 @@ private: } private: - template - bool replaceAliased(iterator i1, iterator i2, - FwdIterator s1, FwdIterator s2, std::false_type) { + template + bool replaceAliased(iterator /* i1 */, + iterator /* i2 */, + FwdIterator /* s1 */, + FwdIterator /* s2 */, + std::false_type) { return false; } @@ -1741,7 +1691,6 @@ private: void replaceImpl(iterator i1, iterator i2, FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) { Invariant checker(*this); - (void) checker; // Handle aliased replace if (replaceAliased(i1, i2, s1, s2, @@ -1793,10 +1742,9 @@ public: enforce(pos <= size(), std::__throw_out_of_range, ""); procrustes(n, size() - pos); - fbstring_detail::pod_copy( - data() + pos, - data() + pos + n, - s); + if (n != 0) { + fbstring_detail::pod_copy(data() + pos, data() + pos + n, s); + } return n; } @@ -2021,11 +1969,18 @@ public: return find_last_not_of(&c, pos, 1); } - basic_fbstring substr(size_type pos = 0, size_type n = npos) const { + basic_fbstring substr(size_type pos = 0, size_type n = npos) const& { enforce(pos <= size(), std::__throw_out_of_range, ""); return basic_fbstring(data() + pos, std::min(n, size() - pos)); } + basic_fbstring substr(size_type pos = 0, size_type n = npos) && { + enforce(pos <= size(), std::__throw_out_of_range, ""); + erase(0, pos); + if (n < size()) resize(n); + return std::move(*this); + } + int compare(const basic_fbstring& str) const { // FIX due to Goncalo N M de Carvalho July 18, 2005 return compare(0, size(), str); @@ -2073,7 +2028,7 @@ private: }; // non-member functions -// C++11 21.4.8.1/2 +// C++11 21.4.8.1/1 template inline basic_fbstring operator+(const basic_fbstring& lhs, @@ -2115,24 +2070,45 @@ basic_fbstring operator+(basic_fbstring&& lhs, return std::move(lhs.append(rhs)); } +// C++11 21.4.8.1/5 template inline basic_fbstring operator+( - const typename basic_fbstring::value_type* lhs, + const E* lhs, const basic_fbstring& rhs) { // basic_fbstring result; - const typename basic_fbstring::size_type len = - basic_fbstring::traits_type::length(lhs); + const auto len = basic_fbstring::traits_type::length(lhs); result.reserve(len + rhs.size()); result.append(lhs, len).append(rhs); return result; } +// C++11 21.4.8.1/6 template inline basic_fbstring operator+( - typename basic_fbstring::value_type lhs, + const E* lhs, + basic_fbstring&& rhs) { + // + const auto len = basic_fbstring::traits_type::length(lhs); + if (rhs.capacity() >= len + rhs.size()) { + // Good, at least we don't need to reallocate + rhs.insert(rhs.begin(), lhs, lhs + len); + return rhs; + } + // Meh, no go. Do it by hand since we have len already. + basic_fbstring result; + result.reserve(len + rhs.size()); + result.append(lhs, len).append(rhs); + return result; +} + +// C++11 21.4.8.1/7 +template +inline +basic_fbstring operator+( + E lhs, const basic_fbstring& rhs) { basic_fbstring result; @@ -2142,11 +2118,29 @@ basic_fbstring operator+( return result; } +// C++11 21.4.8.1/8 +template +inline +basic_fbstring operator+( + E lhs, + basic_fbstring&& rhs) { + // + if (rhs.capacity() > rhs.size()) { + // Good, at least we don't need to reallocate + rhs.insert(rhs.begin(), lhs); + return rhs; + } + // Meh, no go. Forward to operator+(E, const&). + auto const& rhsC = rhs; + return lhs + rhsC; +} + +// C++11 21.4.8.1/9 template inline basic_fbstring operator+( const basic_fbstring& lhs, - const typename basic_fbstring::value_type* rhs) { + const E* rhs) { typedef typename basic_fbstring::size_type size_type; typedef typename basic_fbstring::traits_type traits_type; @@ -2158,11 +2152,22 @@ basic_fbstring operator+( return result; } +// C++11 21.4.8.1/10 +template +inline +basic_fbstring operator+( + basic_fbstring&& lhs, + const E* rhs) { + // + return std::move(lhs += rhs); +} + +// C++11 21.4.8.1/11 template inline basic_fbstring operator+( const basic_fbstring& lhs, - typename basic_fbstring::value_type rhs) { + E rhs) { basic_fbstring result; result.reserve(lhs.size() + 1); @@ -2171,6 +2176,16 @@ basic_fbstring operator+( return result; } +// C++11 21.4.8.1/12 +template +inline +basic_fbstring operator+( + basic_fbstring&& lhs, + E rhs) { + // + return std::move(lhs += rhs); +} + template inline bool operator==(const basic_fbstring& lhs, @@ -2305,20 +2320,20 @@ std::basic_istream< auto err = __ios_base::goodbit; if (sentry) { auto n = is.width(); - if (n == 0) { + if (n <= 0) { n = str.max_size(); } str.erase(); - auto got = is.rdbuf()->sgetc(); - for (; extracted != n && got != T::eof() && !isspace(got); ++extracted) { - // Whew. We get to store this guy + for (auto got = is.rdbuf()->sgetc(); extracted != size_t(n); ++extracted) { + if (got == T::eof()) { + err |= __ios_base::eofbit; + is.width(0); + break; + } + if (isspace(got)) break; str.push_back(got); got = is.rdbuf()->snextc(); } - if (got == T::eof()) { - err |= __ios_base::eofbit; - is.width(0); - } } if (!extracted) { err |= __ios_base::failbit; @@ -2357,63 +2372,15 @@ operator<<( os.setstate(std::ios_base::badbit | std::ios_base::failbit); } } +#elif defined(_MSC_VER) + // MSVC doesn't define __ostream_insert + os.write(str.data(), str.size()); #else std::__ostream_insert(os, str.data(), str.size()); #endif return os; } -#ifndef _LIBSTDCXX_FBSTRING - -template -inline -std::basic_istream::value_type, - typename basic_fbstring::traits_type>& -getline( - std::basic_istream::value_type, - typename basic_fbstring::traits_type>& is, - basic_fbstring& str, - typename basic_fbstring::value_type delim) { - // Use the nonstandard getdelim() - char * buf = nullptr; - size_t size = 0; - for (;;) { - // This looks quadratic but it really depends on realloc - auto const newSize = size + 128; - buf = static_cast(checkedRealloc(buf, newSize)); - is.getline(buf + size, newSize - size, delim); - if (is.bad() || is.eof() || !is.fail()) { - // done by either failure, end of file, or normal read - size += std::strlen(buf + size); - break; - } - // Here we have failed due to too short a buffer - // Minus one to discount the terminating '\0' - size = newSize - 1; - assert(buf[size] == 0); - // Clear the error so we can continue reading - is.clear(); - } - basic_fbstring result(buf, size, size + 1, - AcquireMallocatedString()); - result.swap(str); - return is; -} - -template -inline -std::basic_istream::value_type, - typename basic_fbstring::traits_type>& -getline( - std::basic_istream::value_type, - typename basic_fbstring::traits_type>& is, - basic_fbstring& str) { - // Just forward to the version with a delimiter - return getline(is, str, '\n'); -} - -#endif - template const typename basic_fbstring::size_type basic_fbstring::npos = @@ -2470,53 +2437,52 @@ _GLIBCXX_END_NAMESPACE_VERSION // // Handle interaction with different C++ standard libraries, which // expect these types to be in different namespaces. -namespace std { -template -struct hash > : private hash { - size_t operator()(const folly::basic_fbstring & s) const { - return hash::operator()(s.c_str()); - } -}; +#define FOLLY_FBSTRING_HASH1(T) \ + template <> \ + struct hash<::folly::basic_fbstring> { \ + size_t operator()(const ::folly::basic_fbstring& s) const { \ + return ::folly::hash::fnv32_buf(s.data(), s.size() * sizeof(T)); \ + } \ + }; -template <> -struct hash< ::folly::fbstring> { - size_t operator()(const ::folly::fbstring& s) const { - return ::folly::hash::fnv32_buf(s.data(), s.size()); - } -}; +// The C++11 standard says that these four are defined +#define FOLLY_FBSTRING_HASH \ + FOLLY_FBSTRING_HASH1(char) \ + FOLLY_FBSTRING_HASH1(char16_t) \ + FOLLY_FBSTRING_HASH1(char32_t) \ + FOLLY_FBSTRING_HASH1(wchar_t) -} +namespace std { + +FOLLY_FBSTRING_HASH + +} // namespace std #if FOLLY_HAVE_DEPRECATED_ASSOC #if defined(_GLIBCXX_SYMVER) && !defined(__BIONIC__) namespace __gnu_cxx { -template -struct hash > : private hash { - size_t operator()(const folly::basic_fbstring & s) const { - return hash::operator()(s.c_str()); - } -}; - -template <> -struct hash< ::folly::fbstring> { - size_t operator()(const ::folly::fbstring& s) const { - return ::folly::hash::fnv32_buf(s.data(), s.size()); - } -}; +FOLLY_FBSTRING_HASH -} +} // namespace __gnu_cxx #endif // _GLIBCXX_SYMVER && !__BIONIC__ #endif // FOLLY_HAVE_DEPRECATED_ASSOC +#undef FOLLY_FBSTRING_HASH +#undef FOLLY_FBSTRING_HASH1 + #endif // _LIBSTDCXX_FBSTRING #pragma GCC diagnostic pop -#undef FBSTRING_DISABLE_ADDRESS_SANITIZER +#undef FBSTRING_DISABLE_SSO +#undef FBSTRING_SANITIZE_ADDRESS #undef throw #undef FBSTRING_LIKELY #undef FBSTRING_UNLIKELY -#endif // FOLLY_BASE_FBSTRING_H_ +#ifdef FOLLY_DEFINED_NDEBUG_FOR_FBSTRING +#undef NDEBUG +#undef FOLLY_DEFINED_NDEBUG_FOR_FBSTRING +#endif // FOLLY_DEFINED_NDEBUG_FOR_FBSTRING