/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#endif
+// We defined these here rather than including Likely.h to avoid
+// redefinition errors when fbstring is imported into libstdc++.
+#define FBSTRING_LIKELY(x) (__builtin_expect((x), 1))
+#define FBSTRING_UNLIKELY(x) (__builtin_expect((x), 0))
+
#include <atomic>
#include <limits>
#include <type_traits>
+// Ignore shadowing warnings within this file, so includers can use -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
+// FBString cannot use throw when replacing std::string, though it may still
+// use std::__throw_*
+#define throw FOLLY_FBSTRING_MAY_NOT_USE_THROW
+
#ifdef _LIBSTDCXX_FBSTRING
namespace std _GLIBCXX_VISIBILITY(default) {
_GLIBCXX_BEGIN_NAMESPACE_VERSION
namespace folly {
#endif
+// Different versions of gcc/clang support different versions of
+// the address sanitizer attribute. Unfortunately, this attribute
+// has issues when inlining is used, so disable that as well.
+#if defined(__clang__)
+# if __has_feature(address_sanitizer)
+# if __has_attribute(__no_address_safety_analysis__)
+# define FBSTRING_DISABLE_ADDRESS_SANITIZER \
+ __attribute__((__no_address_safety_analysis__, __noinline__))
+# elif __has_attribute(__no_sanitize_address__)
+# define FBSTRING_DISABLE_ADDRESS_SANITIZER \
+ __attribute__((__no_sanitize_address__, __noinline__))
+# endif
+# endif
+#elif defined (__GNUC__) && \
+ (__GNUC__ == 4) && \
+ (__GNUC_MINOR__ >= 8) && \
+ __SANITIZE_ADDRESS__
+# define FBSTRING_DISABLE_ADDRESS_SANITIZER \
+ __attribute__((__no_address_safety_analysis__, __noinline__))
+#endif
+#ifndef FBSTRING_DISABLE_ADDRESS_SANITIZER
+# define FBSTRING_DISABLE_ADDRESS_SANITIZER
+#endif
+
namespace fbstring_detail {
template <class InIt, class OutIt>
/*
* Lightly structured memcpy, simplifies copying PODs and introduces
- * some asserts
+ * some asserts. Unfortunately using this function may cause
+ * measurable overhead (presumably because it adjusts from a begin/end
+ * convention to a pointer/size convention, so it does some extra
+ * arithmetic even though the caller might have done the inverse
+ * adaptation outside).
*/
template <class Pod>
-inline Pod* pod_copy(const Pod* b, const Pod* e, Pod* d) {
+inline void pod_copy(const Pod* b, const Pod* e, Pod* d) {
assert(e >= b);
assert(d >= e || d + (e - b) <= b);
- const size_t s = e - b;
- std::memcpy(d, b, s * sizeof(*b));
- return d + s;
+ memcpy(d, b, (e - b) * sizeof(Pod));
}
/*
void shrink(size_t delta);
// Expands the string by delta characters (i.e. after this call
// size() will report the old size() plus delta) but without
- // initializing the expanded region. The caller is expected to fill
- // the expanded area appropriately.
- void expand_noinit(size_t delta);
+ // initializing the expanded region. Returns a pointer to the memory
+ // to be initialized (the beginning of the expanded portion). The
+ // caller is expected to fill the expanded area appropriately.
+ Char* expand_noinit(size_t delta);
// Expands the string by one character and sets the last character
// to c.
void push_back(Char c);
};
*/
+/**
+ * gcc-4.7 throws what appears to be some false positive uninitialized
+ * warnings for the members of the MediumLarge struct. So, mute them here.
+ */
+#if defined(__GNUC__) && !defined(__clang__)
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+
/**
* This is the core of the string. The code should work on 32- and
* 64-bit architectures and with any Char size. Porting to big endian
*/
template <class Char> class fbstring_core {
public:
- fbstring_core() {
+ fbstring_core() noexcept {
// Only initialize the tag, will set the MSBs (i.e. the small
// string size) to zero too
- ml_.capacity_ = maxSmallSize << (8 * (sizeof(size_t) - 1));
+ ml_.capacity_ = maxSmallSize << (8 * (sizeof(size_t) - sizeof(Char)));
// or: setSmallSize(0);
writeTerminator();
assert(category() == isSmall && size() == 0);
assert(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0);
}
- fbstring_core(fbstring_core&& goner) {
+ fbstring_core(fbstring_core&& goner) noexcept {
if (goner.category() == isSmall) {
// Just copy, leave the goner in peace
new(this) fbstring_core(goner.small_, goner.smallSize());
}
}
- fbstring_core(const Char *const data, const size_t size) {
+ // NOTE(agallagher): The word-aligned copy path copies bytes which are
+ // outside the range of the string, and makes address sanitizer unhappy,
+ // so just disable it on this function.
+ fbstring_core(const Char *const data, const size_t size)
+ FBSTRING_DISABLE_ADDRESS_SANITIZER {
// Simplest case first: small strings are bitblitted
if (size <= maxSmallSize) {
// Layout is: Char* data_, size_t size_, size_t capacity_
assert(memcmp(this->data(), data, size * sizeof(Char)) == 0);
}
- ~fbstring_core() {
+ ~fbstring_core() noexcept {
auto const c = category();
if (c == isSmall) {
return;
}
// Snatches a previously mallocated string. The parameter "size"
- // is the size of the string, and the parameter "capacity" is the size
- // of the mallocated block. The string must be \0-terminated, so
- // data[size] == '\0' and capacity >= size + 1.
+ // is the size of the string, and the parameter "allocatedSize"
+ // is the size of the mallocated block. The string must be
+ // \0-terminated, so allocatedSize >= size + 1 and data[size] == '\0'.
//
- // So if you want a 2-character string, pass malloc(3) as "data", pass 2 as
- // "size", and pass 3 as "capacity".
- fbstring_core(Char *const data, const size_t size,
- const size_t capacity,
+ // So if you want a 2-character string, pass malloc(3) as "data",
+ // pass 2 as "size", and pass 3 as "allocatedSize".
+ fbstring_core(Char * const data,
+ const size_t size,
+ const size_t allocatedSize,
AcquireMallocatedString) {
if (size > 0) {
- assert(capacity > size);
+ assert(allocatedSize >= size + 1);
assert(data[size] == '\0');
// Use the medium string storage
ml_.data_ = data;
ml_.size_ = size;
- ml_.capacity_ = capacity | isMedium;
+ // Don't forget about null terminator
+ ml_.capacity_ = (allocatedSize - 1) | isMedium;
} else {
// No need for the memory
free(data);
smartRealloc(
ml_.data_,
ml_.size_ * sizeof(Char),
- ml_.capacity() * sizeof(Char),
+ (ml_.capacity() + 1) * sizeof(Char),
capacityBytes));
writeTerminator();
ml_.capacity_ = (capacityBytes / sizeof(Char) - 1) | isMedium;
assert(capacity() >= minCapacity);
}
- void expand_noinit(const size_t delta) {
+ Char * expand_noinit(const size_t delta) {
// Strategy is simple: make room, then change size
assert(capacity() >= size());
- size_t sz, newSz, cp;
+ size_t sz, newSz;
if (category() == isSmall) {
sz = smallSize();
newSz = sz + delta;
if (newSz <= maxSmallSize) {
setSmallSize(newSz);
writeTerminator();
- return;
+ return small_ + sz;
}
- cp = maxSmallSize;
+ reserve(newSz);
} else {
sz = ml_.size_;
- newSz = sz + delta;
- cp = capacity();
+ newSz = ml_.size_ + delta;
+ if (newSz > capacity()) {
+ reserve(newSz);
+ }
}
- if (newSz > cp) reserve(newSz);
assert(capacity() >= newSz);
// Category can't be small - we took care of that above
assert(category() == isMedium || category() == isLarge);
ml_.size_ = newSz;
writeTerminator();
assert(size() == newSz);
+ return ml_.data_ + sz;
}
void push_back(Char c) {
assert(capacity() >= size());
- size_t sz, cp;
+ size_t sz;
if (category() == isSmall) {
sz = smallSize();
if (sz < maxSmallSize) {
writeTerminator();
return;
}
- reserve(maxSmallSize * 3 / 2);
+ reserve(maxSmallSize * 2);
} else {
sz = ml_.size_;
- cp = ml_.capacity();
- if (sz == cp) reserve(cp * 3 / 2);
+ if (sz == capacity()) { // always true for isShared()
+ reserve(1 + sz * 3 / 2); // ensures not shared
+ }
}
+ assert(!isShared());
assert(capacity() >= sz + 1);
// Category can't be small - we took care of that above
assert(category() == isMedium || category() == isLarge);
ml_.size_ = sz + 1;
- mutable_data()[sz] = c;
+ ml_.data_[sz] = c;
writeTerminator();
}
return static_cast<RefCounted*>(
static_cast<void*>(
static_cast<unsigned char*>(static_cast<void*>(p))
- - offsetof(RefCounted, data_)));
+ - sizeof(refCount_)));
}
static size_t refs(Char * p) {
}
};
+#if defined(__GNUC__) && !defined(__clang__)
+# pragma GCC diagnostic pop
+#endif
+
#ifndef _LIBSTDCXX_FBSTRING
/**
* Dummy fbstring core that uses an actual std::string. This doesn't
assert(delta <= size());
backend_.resize(size() - delta);
}
- void expand_noinit(size_t delta) {
+ Char * expand_noinit(size_t delta) {
+ auto const sz = size();
backend_.resize(size() + delta);
+ return backend_.data() + sz;
}
void push_back(Char c) {
backend_.push_back(c);
}
public:
- // 21.3.1 construct/copy/destroy
- explicit basic_fbstring(const A& a = A()) {
+ // C++11 21.4.2 construct/copy/destroy
+ explicit basic_fbstring(const A& a = A()) noexcept {
}
basic_fbstring(const basic_fbstring& str)
}
// Move constructor
- basic_fbstring(basic_fbstring&& goner) : store_(std::move(goner.store_)) {
+ basic_fbstring(basic_fbstring&& goner) noexcept
+ : store_(std::move(goner.store_)) {
}
#ifndef _LIBSTDCXX_FBSTRING
}
basic_fbstring(size_type n, value_type c, const A& a = A()) {
- store_.expand_noinit(n);
- auto const data = store_.mutable_data();
+ auto const data = store_.expand_noinit(n);
fbstring_detail::pod_fill(data, data + n, c);
store_.writeTerminator();
}
: store_(s, n, c, a) {
}
- ~basic_fbstring() {
+ // Construction from initialization list
+ basic_fbstring(std::initializer_list<value_type> il) {
+ assign(il.begin(), il.end());
+ }
+
+ ~basic_fbstring() noexcept {
}
- basic_fbstring& operator=(const basic_fbstring & lhs) {
- if (&lhs == this) {
+ basic_fbstring& operator=(const basic_fbstring& lhs) {
+ if (FBSTRING_UNLIKELY(&lhs == this)) {
return *this;
}
auto const oldSize = size();
}
// Move assignment
- basic_fbstring& operator=(basic_fbstring&& goner) {
+ basic_fbstring& operator=(basic_fbstring&& goner) noexcept {
+ if (FBSTRING_UNLIKELY(&goner == this)) {
+ // Compatibility with std::basic_string<>,
+ // C++11 21.4.2 [string.cons] / 23 requires self-move-assignment support.
+ return *this;
+ }
// No need of this anymore
this->~basic_fbstring();
// Move the goner into this
return *this;
}
- // 21.3.2 iterators:
+ basic_fbstring& operator=(std::initializer_list<value_type> il) {
+ return assign(il.begin(), il.end());
+ }
+
+ // C++11 21.4.3 iterators:
iterator begin() { return store_.mutable_data(); }
const_iterator begin() const { return store_.data(); }
+ const_iterator cbegin() const { return begin(); }
+
iterator end() {
return store_.mutable_data() + store_.size();
}
return store_.data() + store_.size();
}
+ const_iterator cend() const { return end(); }
+
reverse_iterator rbegin() {
return reverse_iterator(end());
}
return const_reverse_iterator(end());
}
+ const_reverse_iterator crbegin() const { return rbegin(); }
+
reverse_iterator rend() {
return reverse_iterator(begin());
}
return const_reverse_iterator(begin());
}
- // Non-standard functions. They intentionally return by value to
- // reduce pressure on the reference counting mechanism.
- value_type front() const { return *begin(); }
- value_type back() const {
+ const_reverse_iterator crend() const { return rend(); }
+
+ // Added by C++11
+ // C++11 21.4.5, element access:
+ const value_type& front() const { return *begin(); }
+ const value_type& back() const {
+ assert(!empty());
+ // Should be begin()[size() - 1], but that branches twice
+ return *(end() - 1);
+ }
+ value_type& front() { return *begin(); }
+ value_type& back() {
+ assert(!empty());
+ // Should be begin()[size() - 1], but that branches twice
+ return *(end() - 1);
+ }
+ void pop_back() {
assert(!empty());
- return begin()[size() - 1];
+ store_.shrink(1);
}
- void pop_back() { assert(!empty()); store_.shrink(1); }
- // 21.3.3 capacity:
+ // C++11 21.4.4 capacity:
size_type size() const { return store_.size(); }
size_type length() const { return size(); }
store_.reserve(res_arg);
}
+ void shrink_to_fit() {
+ // Shrink only if slack memory is sufficiently large
+ if (capacity() < size() * 3 / 2) {
+ return;
+ }
+ basic_fbstring(cbegin(), cend()).swap(*this);
+ }
+
void clear() { resize(0); }
bool empty() const { return size() == 0; }
- // 21.3.4 element access:
+ // C++11 21.4.5 element access:
const_reference operator[](size_type pos) const {
return *(c_str() + pos);
}
return (*this)[n];
}
- // 21.3.5 modifiers:
+ // C++11 21.4.6 modifiers:
basic_fbstring& operator+=(const basic_fbstring& str) {
return append(str);
}
return *this;
}
+ basic_fbstring& operator+=(std::initializer_list<value_type> il) {
+ append(il);
+ return *this;
+ }
+
basic_fbstring& append(const basic_fbstring& str) {
#ifndef NDEBUG
auto desiredSize = size() + str.size();
return append(str.data() + pos, n);
}
- basic_fbstring& append(const value_type* s, const size_type n) {
+ basic_fbstring& append(const value_type* s, size_type n) {
#ifndef NDEBUG
- auto oldSize = size();
-#endif
Invariant checker(*this);
(void) checker;
- static std::less_equal<const value_type*> le;
- if (le(data(), s) && !le(data() + size(), s)) {// aliasing
- assert(le(s + n, data() + size()));
- const size_type offset = s - data();
- store_.reserve(size() + n);
+#endif
+ if (FBSTRING_UNLIKELY(!n)) {
+ // Unlikely but must be done
+ return *this;
+ }
+ auto const oldSize = size();
+ auto const oldData = data();
+ // Check for aliasing (rare). We could use "<=" here but in theory
+ // those do not work for pointers unless the pointers point to
+ // elements in the same array. For that reason we use
+ // std::less_equal, which is guaranteed to offer a total order
+ // over pointers. See discussion at http://goo.gl/Cy2ya for more
+ // info.
+ std::less_equal<const value_type*> le;
+ if (FBSTRING_UNLIKELY(le(oldData, s) && !le(oldData + oldSize, s))) {
+ assert(le(s + n, oldData + oldSize));
+ const size_type offset = s - oldData;
+ store_.reserve(oldSize + n);
// Restore the source
s = data() + offset;
}
- store_.expand_noinit(n);
- fbstring_detail::pod_copy(s, s + n, end() - n);
- store_.writeTerminator();
- assert(size() == oldSize + n);
+ // Warning! Repeated appends with short strings may actually incur
+ // practically quadratic performance. Avoid that by pushing back
+ // the first character (which ensures exponential growth) and then
+ // appending the rest normally. Worst case the append may incur a
+ // second allocation but that will be rare.
+ push_back(*s++);
+ --n;
+ memcpy(store_.expand_noinit(n), s, n * sizeof(value_type));
+ assert(size() == oldSize + n + 1);
return *this;
}
return *this;
}
+ basic_fbstring& append(std::initializer_list<value_type> il) {
+ return append(il.begin(), il.end());
+ }
+
void push_back(const value_type c) { // primitive
store_.push_back(c);
}
return assign(str.data(), str.size());
}
+ basic_fbstring& assign(basic_fbstring&& str) {
+ return *this = std::move(str);
+ }
+
basic_fbstring& assign(const basic_fbstring& str, const size_type pos,
size_type n) {
const size_type sz = str.size();
return assign(s, traits_type::length(s));
}
+ basic_fbstring& assign(std::initializer_list<value_type> il) {
+ return assign(il.begin(), il.end());
+ }
+
template <class ItOrLength, class ItOrChar>
basic_fbstring& assign(ItOrLength first_or_n, ItOrChar last_or_c) {
return replace(begin(), end(), first_or_n, last_or_c);
return *this;
}
- iterator insert(const iterator p, const value_type c) {
+ iterator insert(const_iterator p, const value_type c) {
const size_type pos = p - begin();
insert(p, 1, c);
return begin() + pos;
private:
template <int i> class Selector {};
- basic_fbstring& insertImplDiscr(iterator p,
- size_type n, value_type c, Selector<1>) {
+ iterator insertImplDiscr(const_iterator p,
+ size_type n, value_type c, Selector<1>) {
Invariant checker(*this);
(void) checker;
+ auto const pos = p - begin();
assert(p >= begin() && p <= end());
if (capacity() - size() < n) {
const size_type sz = p - begin();
p = begin() + sz;
}
const iterator oldEnd = end();
- if( n < size_type(oldEnd - p)) {
+ if (n < size_type(oldEnd - p)) {
append(oldEnd - n, oldEnd);
//std::copy(
// reverse_iterator(oldEnd - n),
// reverse_iterator(p),
// reverse_iterator(oldEnd));
- fbstring_detail::pod_move(&*p, &*oldEnd - n, &*p + n);
- std::fill(p, p + n, c);
+ fbstring_detail::pod_move(&*p, &*oldEnd - n,
+ begin() + pos + n);
+ std::fill(begin() + pos, begin() + pos + n, c);
} else {
append(n - (end() - p), c);
- append(p, oldEnd);
- std::fill(p, oldEnd, c);
+ append(iterator(p), oldEnd);
+ std::fill(iterator(p), oldEnd, c);
}
store_.writeTerminator();
- return *this;
+ return begin() + pos;
}
template<class InputIter>
- basic_fbstring& insertImplDiscr(iterator i,
- InputIter b, InputIter e, Selector<0>) {
- insertImpl(i, b, e,
+ iterator insertImplDiscr(const_iterator i,
+ InputIter b, InputIter e, Selector<0>) {
+ return insertImpl(i, b, e,
typename std::iterator_traits<InputIter>::iterator_category());
- return *this;
}
template <class FwdIterator>
- void insertImpl(iterator i,
+ iterator insertImpl(const_iterator i,
FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) {
Invariant checker(*this);
(void) checker;
const iterator tailBegin = end() - n2;
store_.expand_noinit(n2);
fbstring_detail::pod_copy(tailBegin, tailBegin + n2, end() - n2);
- std::copy(reverse_iterator(tailBegin), reverse_iterator(i),
+ std::copy(const_reverse_iterator(tailBegin), const_reverse_iterator(i),
reverse_iterator(tailBegin + n2));
- std::copy(s1, s2, i);
+ std::copy(s1, s2, begin() + pos);
} else {
FwdIterator t = s1;
const size_type old_size = size();
std::copy(t, s2, begin() + old_size);
fbstring_detail::pod_copy(data() + pos, data() + old_size,
begin() + old_size + newElems);
- std::copy(s1, t, i);
+ std::copy(s1, t, begin() + pos);
}
store_.writeTerminator();
+ return begin() + pos;
}
template <class InputIterator>
- void insertImpl(iterator i,
- InputIterator b, InputIterator e, std::input_iterator_tag) {
+ iterator insertImpl(const_iterator i,
+ InputIterator b, InputIterator e,
+ std::input_iterator_tag) {
+ const auto pos = i - begin();
basic_fbstring temp(begin(), i);
for (; b != e; ++b) {
temp.push_back(*b);
}
- temp.append(i, end());
+ temp.append(i, cend());
swap(temp);
+ return begin() + pos;
}
public:
template <class ItOrLength, class ItOrChar>
- void insert(iterator p, ItOrLength first_or_n, ItOrChar last_or_c) {
+ iterator insert(const_iterator p, ItOrLength first_or_n, ItOrChar last_or_c) {
Selector<std::numeric_limits<ItOrLength>::is_specialized> sel;
- insertImplDiscr(p, first_or_n, last_or_c, sel);
+ return insertImplDiscr(p, first_or_n, last_or_c, sel);
+ }
+
+ iterator insert(const_iterator p, std::initializer_list<value_type> il) {
+ return insert(p, il.begin(), il.end());
}
basic_fbstring& erase(size_type pos = 0, size_type n = npos) {
store_.swap(rhs.store_);
}
- // 21.3.6 string operations:
const value_type* c_str() const {
return store_.c_str();
}
const size_type nsize) const {
if (!nsize) return pos;
auto const size = this->size();
- if (nsize + pos > size) return npos;
+ // nsize + pos can overflow (eg pos == npos), guard against that by checking
+ // that nsize + pos does not wrap around.
+ if (nsize + pos > size || nsize + pos < pos) return npos;
// Don't use std::search, use a Boyer-Moore-like trick by comparing
// the last characters first
auto const haystack = data();
inline
bool operator==(const basic_fbstring<E, T, A, S>& lhs,
const basic_fbstring<E, T, A, S>& rhs) {
- return lhs.compare(rhs) == 0; }
+ return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; }
template <typename E, class T, class A, class S>
inline
return !(lhs < rhs);
}
-// subclause 21.3.7.8:
+// C++11 21.4.8.8
template <typename E, class T, class A, class S>
void swap(basic_fbstring<E, T, A, S>& lhs, basic_fbstring<E, T, A, S>& rhs) {
lhs.swap(rhs);
} // namespace folly
+#pragma GCC diagnostic pop
+
#ifndef _LIBSTDCXX_FBSTRING
namespace std {
template <>
struct hash< ::folly::fbstring> {
size_t operator()(const ::folly::fbstring& s) const {
- return ::folly::hash::fnv32(s.c_str());
+ return ::folly::hash::fnv32_buf(s.data(), s.size());
}
};
}
#endif // _LIBSTDCXX_FBSTRING
+#undef FBSTRING_DISABLE_ADDRESS_SANITIZER
+#undef throw
+#undef FBSTRING_LIKELY
+#undef FBSTRING_UNLIKELY
+
#endif // FOLLY_BASE_FBSTRING_H_