Add mechanizm for caching local and peer addresses in AsyncSSLSocket.

[folly.git] / folly / FBString.h
diff --git a/folly/FBString.h b/folly/FBString.h

index d67c075d9e2214b32f10c2ddfe01070b710ffa80..2acaaf79bf5b924e5ec28958e8eec33459e14bb1 100644 (file)
--- a/folly/FBString.h
+++ b/folly/FBString.h
@@ -1,5 +1,5 @@
  /*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2015 Facebook, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -20,94 +20,71 @@
  #ifndef FOLLY_BASE_FBSTRING_H_
  #define FOLLY_BASE_FBSTRING_H_
  
-/**
-   fbstring's behavior can be configured via two macro definitions, as
-   follows. Normally, fbstring does not write a '\0' at the end of
-   each string whenever it changes the underlying characters. Instead,
-   it lazily writes the '\0' whenever either c_str() or data()
-   called.
-
-   This is standard-compliant behavior and may save costs in some
-   circumstances. However, it may be surprising to some client code
-   because c_str() and data() are const member functions (fbstring
-   uses the "mutable" storage class for its own state).
-
-   In order to appease client code that expects fbstring to be
-   zero-terminated at all times, if the preprocessor symbol
-   FBSTRING_CONSERVATIVE is defined, fbstring does exactly that,
-   i.e. it goes the extra mile to guarantee a '\0' is always planted
-   at the end of its data.
-
-   On the contrary, if the desire is to debug faulty client code that
-   unduly assumes the '\0' is present, fbstring plants a '^' (i.e.,
-   emphatically NOT a zero) at the end of each string if
-   FBSTRING_PERVERSE is defined. (Calling c_str() or data() still
-   writes the '\0', of course.)
-
-   The preprocessor symbols FBSTRING_PERVERSE and
-   FBSTRING_CONSERVATIVE cannot be defined simultaneously. This is
-   enforced during preprocessing.
-*/
-
-//#define FBSTRING_PERVERSE
-//#define FBSTRING_CONSERVATIVE
-
-#ifdef FBSTRING_PERVERSE
-#ifdef FBSTRING_CONSERVATIVE
-#error Cannot define both FBSTRING_PERVERSE and FBSTRING_CONSERVATIVE.
-#endif
-#endif
-
  #include <atomic>
  #include <limits>
  #include <type_traits>
  
-// libc++ doesn't provide this header
-#ifndef _LIBCPP_VERSION
  // This file appears in two locations: inside fbcode and in the
  // libstdc++ source code (when embedding fbstring as std::string).
-// To aid in this schizophrenic use, two macros are defined in
-// c++config.h:
-//   _LIBSTDCXX_FBSTRING - Set inside libstdc++.  This is useful to
-//      gate use inside fbcode v. libstdc++
-#include <bits/c++config.h>
-#endif
-
-#ifdef _GLIBCXX_SYMVER
-#include <ext/hash_set>
-#include <ext/hash_map>
-#endif
-
+// To aid in this schizophrenic use, _LIBSTDCXX_FBSTRING is defined in
+// libstdc++'s c++config.h, to gate use inside fbcode v. libstdc++.
  #ifdef _LIBSTDCXX_FBSTRING
  
  #pragma GCC system_header
  
+// When used as std::string replacement always disable assertions.
+#ifndef NDEBUG
+#define NDEBUG
+#define FOLLY_DEFINED_NDEBUG_FOR_FBSTRING
+#endif // NDEBUG
+
  // Handle the cases where the fbcode version (folly/Malloc.h) is included
  // either before or after this inclusion.
  #ifdef FOLLY_MALLOC_H_
  #undef FOLLY_MALLOC_H_
-#include "basic_fbstring_malloc.h"
+#include "basic_fbstring_malloc.h" // nolint
  #else
-#include "basic_fbstring_malloc.h"
+#include "basic_fbstring_malloc.h" // nolint
  #undef FOLLY_MALLOC_H_
  #endif
  
  #else // !_LIBSTDCXX_FBSTRING
  
+#include <folly/Portability.h>
+
+// libc++ doesn't provide this header, nor does msvc
+#ifdef FOLLY_HAVE_BITS_CXXCONFIG_H
+#include <bits/c++config.h>
+#endif
+
  #include <string>
  #include <cstring>
  #include <cassert>
+#include <algorithm>
  
-#include "folly/Traits.h"
-#include "folly/Malloc.h"
-#include "folly/Hash.h"
+#include <folly/Traits.h>
+#include <folly/Malloc.h>
+#include <folly/Hash.h>
+#include <folly/ScopeGuard.h>
+
+#if FOLLY_HAVE_DEPRECATED_ASSOC
+#ifdef _GLIBCXX_SYMVER
+#include <ext/hash_set>
+#include <ext/hash_map>
+#endif
+#endif
  
  #endif
  
  // We defined these here rather than including Likely.h to avoid
  // redefinition errors when fbstring is imported into libstdc++.
+#if defined(__GNUC__) && __GNUC__ >= 4
  #define FBSTRING_LIKELY(x)   (__builtin_expect((x), 1))
  #define FBSTRING_UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define FBSTRING_LIKELY(x)   (x)
+#define FBSTRING_UNLIKELY(x) (x)
+#endif
  
  // Ignore shadowing warnings within this file, so includers can use -Wshadow.
  #pragma GCC diagnostic push
@@ -115,6 +92,7 @@
  
  // FBString cannot use throw when replacing std::string, though it may still
  // use std::__throw_*
+// nolint
  #define throw FOLLY_FBSTRING_MAY_NOT_USE_THROW
  
  #ifdef _LIBSTDCXX_FBSTRING
@@ -129,7 +107,10 @@ namespace folly {
  // has issues when inlining is used, so disable that as well.
  #if defined(__clang__)
  # if __has_feature(address_sanitizer)
-#  if __has_attribute(__no_address_safety_analysis__)
+#  if __has_attribute(__no_sanitize__)
+#   define FBSTRING_DISABLE_ADDRESS_SANITIZER \
+      __attribute__((__no_sanitize__("address"), __noinline__))
+#  elif __has_attribute(__no_address_safety_analysis__)
  #   define FBSTRING_DISABLE_ADDRESS_SANITIZER \
        __attribute__((__no_address_safety_analysis__, __noinline__))
  #  elif __has_attribute(__no_sanitize_address__)
@@ -138,8 +119,6 @@ namespace folly {
  #  endif
  # endif
  #elif defined (__GNUC__) && \
-      (__GNUC__ == 4) && \
-      (__GNUC_MINOR__ >= 8) && \
        __SANITIZE_ADDRESS__
  # define FBSTRING_DISABLE_ADDRESS_SANITIZER \
      __attribute__((__no_address_safety_analysis__, __noinline__))
@@ -283,19 +262,10 @@ private:
  };
  */
  
-/**
- * gcc-4.7 throws what appears to be some false positive uninitialized
- * warnings for the members of the MediumLarge struct.  So, mute them here.
- */
-#if defined(__GNUC__) && !defined(__clang__)
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wuninitialized"
-#endif
-
  /**
   * This is the core of the string. The code should work on 32- and
- * 64-bit architectures and with any Char size. Porting to big endian
- * architectures would require some changes.
+ * 64-bit and both big- and little-endianan architectures with any
+ * Char size.
   *
   * The storage is selected as follows (assuming we store one-byte
   * characters on a 64-bit machine): (a) "small" strings between 0 and
@@ -307,49 +277,43 @@ private:
   * reference-counted and copied lazily. the reference count is
   * allocated right before the character array.
   *
- * The discriminator between these three strategies sits in the two
- * most significant bits of the rightmost char of the storage. If
- * neither is set, then the string is small (and its length sits in
- * the lower-order bits of that rightmost character). If the MSb is
- * set, the string is medium width. If the second MSb is set, then the
- * string is large.
+ * The discriminator between these three strategies sits in two
+ * bits of the rightmost char of the storage. If neither is set, then the
+ * string is small (and its length sits in the lower-order bits on
+ * little-endian or the high-order bits on big-endian of that
+ * rightmost character). If the MSb is set, the string is medium width.
+ * If the second MSb is set, then the string is large. On little-endian,
+ * these 2 bits are the 2 MSbs of MediumLarge::capacity_, while on
+ * big-endian, these 2 bits are the 2 LSbs. This keeps both little-endian
+ * and big-endian fbstring_core equivalent with merely different ops used
+ * to extract capacity/category.
   */
  template <class Char> class fbstring_core {
  public:
-  fbstring_core() noexcept {
-    // Only initialize the tag, will set the MSBs (i.e. the small
-    // string size) to zero too
-    ml_.capacity_ = maxSmallSize << (8 * (sizeof(size_t) - sizeof(Char)));
-    // or: setSmallSize(0);
-    writeTerminator();
-    assert(category() == isSmall && size() == 0);
-  }
+  fbstring_core() noexcept { reset(); }
  
    fbstring_core(const fbstring_core & rhs) {
      assert(&rhs != this);
      // Simplest case first: small strings are bitblitted
-    if (rhs.category() == isSmall) {
-      assert(offsetof(MediumLarge, data_) == 0);
-      assert(offsetof(MediumLarge, size_) == sizeof(ml_.data_));
-      assert(offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_));
-      const size_t size = rhs.smallSize();
-      if (size == 0) {
-        ml_.capacity_ = rhs.ml_.capacity_;
-        writeTerminator();
-      } else {
-        // Just write the whole thing, don't look at details. In
-        // particular we need to copy capacity anyway because we want
-        // to set the size (don't forget that the last character,
-        // which stores a short string's length, is shared with the
-        // ml_.capacity field).
-        ml_ = rhs.ml_;
-      }
-      assert(category() == isSmall && this->size() == rhs.size());
-    } else if (rhs.category() == isLarge) {
+    if (rhs.category() == Category::isSmall) {
+      static_assert(offsetof(MediumLarge, data_) == 0,
+          "fbstring layout failure");
+      static_assert(offsetof(MediumLarge, size_) == sizeof(ml_.data_),
+          "fbstring layout failure");
+      static_assert(offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_),
+          "fbstring layout failure");
+      // Just write the whole thing, don't look at details. In
+      // particular we need to copy capacity anyway because we want
+      // to set the size (don't forget that the last character,
+      // which stores a short string's length, is shared with the
+      // ml_.capacity field).
+      ml_ = rhs.ml_;
+      assert(category() == Category::isSmall && this->size() == rhs.size());
+    } else if (rhs.category() == Category::isLarge) {
        // Large strings are just refcounted
        ml_ = rhs.ml_;
        RefCounted::incrementRefs(ml_.data_);
-      assert(category() == isLarge && size() == rhs.size());
+      assert(category() == Category::isLarge && size() == rhs.size());
      } else {
        // Medium strings are copied eagerly. Don't forget to allocate
        // one extra Char for the null terminator.
@@ -363,22 +327,19 @@ public:
        // No need for writeTerminator() here, we copied one extra
        // element just above.
        ml_.size_ = rhs.ml_.size_;
-      ml_.capacity_ = (allocSize / sizeof(Char) - 1) | isMedium;
-      assert(category() == isMedium);
+      ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
+      assert(category() == Category::isMedium);
      }
      assert(size() == rhs.size());
      assert(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0);
    }
  
    fbstring_core(fbstring_core&& goner) noexcept {
-    if (goner.category() == isSmall) {
-      // Just copy, leave the goner in peace
-      new(this) fbstring_core(goner.small_, goner.smallSize());
-    } else {
-      // Take goner's guts
-      ml_ = goner.ml_;
+    // Take goner's guts
+    ml_ = goner.ml_;
+    if (goner.category() != Category::isSmall) {
        // Clean goner's carcass
-      goner.setSmallSize(0);
+      goner.reset();
      }
    }
  
@@ -387,13 +348,25 @@ public:
    // so just disable it on this function.
    fbstring_core(const Char *const data, const size_t size)
        FBSTRING_DISABLE_ADDRESS_SANITIZER {
+#ifndef NDEBUG
+#ifndef _LIBSTDCXX_FBSTRING
+    SCOPE_EXIT {
+      assert(this->size() == size);
+      assert(memcmp(this->data(), data, size * sizeof(Char)) == 0);
+    };
+#endif
+#endif
+
      // Simplest case first: small strings are bitblitted
      if (size <= maxSmallSize) {
        // Layout is: Char* data_, size_t size_, size_t capacity_
-      /*static_*/assert(sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t));
-      /*static_*/assert(sizeof(Char*) == sizeof(size_t));
+      static_assert(sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t),
+          "fbstring has unexpected size");
+      static_assert(sizeof(Char*) == sizeof(size_t),
+          "fbstring size assumption violation");
        // sizeof(size_t) must be a power of 2
-      /*static_*/assert((sizeof(size_t) & (sizeof(size_t) - 1)) == 0);
+      static_assert((sizeof(size_t) & (sizeof(size_t) - 1)) == 0,
+          "fbstring size assumption violation");
  
        // If data is aligned, use fast word-wise copying. Otherwise,
        // use conservative memcpy.
@@ -418,6 +391,7 @@ public:
          }
        }
        setSmallSize(size);
+      return;
      } else if (size <= maxMediumSize) {
        // Medium strings are allocated normally. Don't forget to
        // allocate one extra Char for the terminating null.
@@ -425,26 +399,24 @@ public:
        ml_.data_ = static_cast<Char*>(checkedMalloc(allocSize));
        fbstring_detail::pod_copy(data, data + size, ml_.data_);
        ml_.size_ = size;
-      ml_.capacity_ = (allocSize / sizeof(Char) - 1) | isMedium;
+      ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
      } else {
        // Large strings are allocated differently
        size_t effectiveCapacity = size;
        auto const newRC = RefCounted::create(data, & effectiveCapacity);
        ml_.data_ = newRC->data_;
        ml_.size_ = size;
-      ml_.capacity_ = effectiveCapacity | isLarge;
+      ml_.setCapacity(effectiveCapacity, Category::isLarge);
      }
      writeTerminator();
-    assert(this->size() == size);
-    assert(memcmp(this->data(), data, size * sizeof(Char)) == 0);
    }
  
    ~fbstring_core() noexcept {
      auto const c = category();
-    if (c == isSmall) {
+    if (c == Category::isSmall) {
        return;
      }
-    if (c == isMedium) {
+    if (c == Category::isMedium) {
        free(ml_.data_);
        return;
      }
@@ -469,11 +441,11 @@ public:
        ml_.data_ = data;
        ml_.size_ = size;
        // Don't forget about null terminator
-      ml_.capacity_ = (allocatedSize - 1) | isMedium;
+      ml_.setCapacity(allocatedSize - 1, Category::isMedium);
      } else {
        // No need for the memory
        free(data);
-      setSmallSize(0);
+      reset();
      }
    }
  
@@ -494,11 +466,11 @@ public:
  
    Char * mutable_data() {
      auto const c = category();
-    if (c == isSmall) {
+    if (c == Category::isSmall) {
        return small_;
      }
-    assert(c == isMedium || c == isLarge);
-    if (c == isLarge && RefCounted::refs(ml_.data_) > 1) {
+    assert(c == Category::isMedium || c == Category::isLarge);
+    if (c == Category::isLarge && RefCounted::refs(ml_.data_) > 1) {
        // Ensure unique.
        size_t effectiveCapacity = ml_.capacity();
        auto const newRC = RefCounted::create(& effectiveCapacity);
@@ -516,44 +488,27 @@ public:
  
    const Char * c_str() const {
      auto const c = category();
-#ifdef FBSTRING_PERVERSE
-    if (c == isSmall) {
-      assert(small_[smallSize()] == TERMINATOR || smallSize() == maxSmallSize
-             || small_[smallSize()] == '\0');
-      small_[smallSize()] = '\0';
-      return small_;
-    }
-    assert(c == isMedium || c == isLarge);
-    assert(ml_.data_[ml_.size_] == TERMINATOR || ml_.data_[ml_.size_] == '\0');
-    ml_.data_[ml_.size_] = '\0';
-#elif defined(FBSTRING_CONSERVATIVE)
-    if (c == isSmall) {
+    if (c == Category::isSmall) {
        assert(small_[smallSize()] == '\0');
        return small_;
      }
-    assert(c == isMedium || c == isLarge);
+    assert(c == Category::isMedium || c == Category::isLarge);
      assert(ml_.data_[ml_.size_] == '\0');
-#else
-    if (c == isSmall) {
-      small_[smallSize()] = '\0';
-      return small_;
-    }
-    assert(c == isMedium || c == isLarge);
-    ml_.data_[ml_.size_] = '\0';
-#endif
      return ml_.data_;
    }
  
    void shrink(const size_t delta) {
-    if (category() == isSmall) {
+    if (category() == Category::isSmall) {
        // Check for underflow
        assert(delta <= smallSize());
        setSmallSize(smallSize() - delta);
-    } else if (category() == isMedium || RefCounted::refs(ml_.data_) == 1) {
+    } else if (category() == Category::isMedium ||
+               RefCounted::refs(ml_.data_) == 1) {
        // Medium strings and unique large strings need no special
        // handling.
        assert(ml_.size_ >= delta);
        ml_.size_ -= delta;
+      writeTerminator();
      } else {
        assert(ml_.size_ >= delta);
        // Shared large string, must make unique. This is because of the
@@ -563,13 +518,11 @@ public:
          fbstring_core(ml_.data_, ml_.size_ - delta).swap(*this);
        }
        // No need to write the terminator.
-      return;
      }
-    writeTerminator();
    }
  
    void reserve(size_t minCapacity) {
-    if (category() == isLarge) {
+    if (category() == Category::isLarge) {
        // Ensure unique
        if (RefCounted::refs(ml_.data_) > 1) {
          // We must make it unique regardless; in-place reallocation is
@@ -585,7 +538,7 @@ public:
          // we have + 1 above.
          RefCounted::decrementRefs(ml_.data_);
          ml_.data_ = newRC->data_;
-        ml_.capacity_ = minCapacity | isLarge;
+        ml_.setCapacity(minCapacity, Category::isLarge);
          // size remains unchanged
        } else {
          // String is not shared, so let's try to realloc (if needed)
@@ -595,12 +548,12 @@ public:
                 RefCounted::reallocate(ml_.data_, ml_.size_,
                                        ml_.capacity(), minCapacity);
            ml_.data_ = newRC->data_;
-          ml_.capacity_ = minCapacity | isLarge;
+          ml_.setCapacity(minCapacity, Category::isLarge);
            writeTerminator();
          }
          assert(capacity() >= minCapacity);
        }
-    } else if (category() == isMedium) {
+    } else if (category() == Category::isMedium) {
        // String is not shared
        if (minCapacity <= ml_.capacity()) {
          return; // nothing to do, there's enough room
@@ -616,7 +569,7 @@ public:
              (ml_.capacity() + 1) * sizeof(Char),
              capacityBytes));
          writeTerminator();
-        ml_.capacity_ = (capacityBytes / sizeof(Char) - 1) | isMedium;
+        ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium);
        } else {
          // Conversion from medium to large string
          fbstring_core nascent;
@@ -630,7 +583,7 @@ public:
          assert(capacity() >= minCapacity);
        }
      } else {
-      assert(category() == isSmall);
+      assert(category() == Category::isSmall);
        if (minCapacity > maxMediumSize) {
          // large
          auto const newRC = RefCounted::create(& minCapacity);
@@ -639,7 +592,7 @@ public:
          // No need for writeTerminator(), we wrote it above with + 1.
          ml_.data_ = newRC->data_;
          ml_.size_ = size;
-        ml_.capacity_ = minCapacity | isLarge;
+        ml_.setCapacity(minCapacity, Category::isLarge);
          assert(capacity() >= minCapacity);
        } else if (minCapacity > maxSmallSize) {
          // medium
@@ -652,7 +605,7 @@ public:
          // No need for writeTerminator(), we wrote it above with + 1.
          ml_.data_ = data;
          ml_.size_ = size;
-        ml_.capacity_ = (allocSizeBytes / sizeof(Char) - 1) | isMedium;
+        ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium);
        } else {
          // small
          // Nothing to do, everything stays put
@@ -665,12 +618,11 @@ public:
      // Strategy is simple: make room, then change size
      assert(capacity() >= size());
      size_t sz, newSz;
-    if (category() == isSmall) {
+    if (category() == Category::isSmall) {
        sz = smallSize();
        newSz = sz + delta;
        if (newSz <= maxSmallSize) {
          setSmallSize(newSz);
-        writeTerminator();
          return small_ + sz;
        }
        reserve(newSz);
@@ -683,7 +635,7 @@ public:
      }
      assert(capacity() >= newSz);
      // Category can't be small - we took care of that above
-    assert(category() == isMedium || category() == isLarge);
+    assert(category() == Category::isMedium || category() == Category::isLarge);
      ml_.size_ = newSz;
      writeTerminator();
      assert(size() == newSz);
@@ -693,12 +645,11 @@ public:
    void push_back(Char c) {
      assert(capacity() >= size());
      size_t sz;
-    if (category() == isSmall) {
+    if (category() == Category::isSmall) {
        sz = smallSize();
        if (sz < maxSmallSize) {
-        setSmallSize(sz + 1);
          small_[sz] = c;
-        writeTerminator();
+        setSmallSize(sz + 1);
          return;
        }
        reserve(maxSmallSize * 2);
@@ -711,21 +662,21 @@ public:
      assert(!isShared());
      assert(capacity() >= sz + 1);
      // Category can't be small - we took care of that above
-    assert(category() == isMedium || category() == isLarge);
+    assert(category() == Category::isMedium || category() == Category::isLarge);
      ml_.size_ = sz + 1;
      ml_.data_[sz] = c;
      writeTerminator();
    }
  
    size_t size() const {
-    return category() == isSmall ? smallSize() : ml_.size_;
+    return category() == Category::isSmall ? smallSize() : ml_.size_;
    }
  
    size_t capacity() const {
      switch (category()) {
-      case isSmall:
+      case Category::isSmall:
          return maxSmallSize;
-      case isLarge:
+      case Category::isLarge:
          // For large-sized strings, a multi-referenced chunk has no
          // available capacity. This is because any attempt to append
          // data would trigger a new allocation.
@@ -736,41 +687,36 @@ public:
    }
  
    bool isShared() const {
-    return category() == isLarge && RefCounted::refs(ml_.data_) > 1;
+    return category() == Category::isLarge && RefCounted::refs(ml_.data_) > 1;
    }
  
-#ifdef FBSTRING_PERVERSE
-  enum { TERMINATOR = '^' };
-#else
-  enum { TERMINATOR = '\0' };
-#endif
-
    void writeTerminator() {
-#if defined(FBSTRING_PERVERSE) || defined(FBSTRING_CONSERVATIVE)
-    if (category() == isSmall) {
+    if (category() == Category::isSmall) {
        const auto s = smallSize();
        if (s != maxSmallSize) {
-        small_[s] = TERMINATOR;
+        small_[s] = '\0';
        }
      } else {
-      ml_.data_[ml_.size_] = TERMINATOR;
+      ml_.data_[ml_.size_] = '\0';
      }
-#endif
    }
  
  private:
    // Disabled
    fbstring_core & operator=(const fbstring_core & rhs);
  
-  struct MediumLarge {
-    Char * data_;
-    size_t size_;
-    size_t capacity_;
-
-    size_t capacity() const {
-      return capacity_ & capacityExtractMask;
-    }
-  };
+  // Equivalent to setSmallSize(0), but with specialized
+  // writeTerminator which doesn't re-check the category after
+  // capacity_ is overwritten.
+  void reset() {
+    // Only initialize the tag, will set the MSBs (i.e. the small
+    // string size) to zero too.
+    ml_.capacity_ = kIsLittleEndian
+      ? maxSmallSize << (8 * (sizeof(size_t) - sizeof(Char)))
+      : maxSmallSize << 2;
+    small_[0] = '\0';
+    assert(category() == Category::isSmall && size() == 0);
+  }
  
    struct RefCounted {
      std::atomic<size_t> refCount_;
@@ -839,37 +785,68 @@ private:
      }
    };
  
+  typedef std::conditional<sizeof(size_t) == 4, uint32_t, uint64_t>::type
+          category_type;
+
+  enum class Category : category_type {
+    isSmall = 0,
+    isMedium = kIsLittleEndian
+      ? sizeof(size_t) == 4 ? 0x80000000 : 0x8000000000000000
+      : 0x2,
+    isLarge =  kIsLittleEndian
+      ? sizeof(size_t) == 4 ? 0x40000000 : 0x4000000000000000
+      : 0x1,
+  };
+
+  Category category() const {
+    // works for both big-endian and little-endian
+    return static_cast<Category>(ml_.capacity_ & categoryExtractMask);
+  }
+
+  struct MediumLarge {
+    Char * data_;
+    size_t size_;
+    size_t capacity_;
+
+    size_t capacity() const {
+      return kIsLittleEndian
+        ? capacity_ & capacityExtractMask
+        : capacity_ >> 2;
+    }
+
+    void setCapacity(size_t cap, Category cat) {
+        capacity_ = kIsLittleEndian
+          ? cap | static_cast<category_type>(cat)
+          : (cap << 2) | static_cast<category_type>(cat);
+    }
+  };
+
    union {
-    mutable Char small_[sizeof(MediumLarge) / sizeof(Char)];
-    mutable MediumLarge ml_;
+    Char small_[sizeof(MediumLarge) / sizeof(Char)];
+    MediumLarge ml_;
    };
  
-  enum {
+  enum : size_t {
      lastChar = sizeof(MediumLarge) - 1,
      maxSmallSize = lastChar / sizeof(Char),
      maxMediumSize = 254 / sizeof(Char),            // coincides with the small
                                                     // bin size in dlmalloc
-    categoryExtractMask = sizeof(size_t) == 4 ? 0xC0000000 : 0xC000000000000000,
-    capacityExtractMask = ~categoryExtractMask,
+    categoryExtractMask = kIsLittleEndian
+      ? sizeof(size_t) == 4 ? 0xC0000000 : 0xC000000000000000
+      : 0x3,
+    capacityExtractMask = kIsLittleEndian
+      ? ~categoryExtractMask
+      : 0x0 /*unused*/,
    };
    static_assert(!(sizeof(MediumLarge) % sizeof(Char)),
                  "Corrupt memory layout for fbstring.");
  
-  enum Category {
-    isSmall = 0,
-    isMedium = sizeof(size_t) == 4 ? 0x80000000 : 0x8000000000000000,
-    isLarge =  sizeof(size_t) == 4 ? 0x40000000 : 0x4000000000000000,
-  };
-
-  Category category() const {
-    // Assumes little endian
-    return static_cast<Category>(ml_.capacity_ & categoryExtractMask);
-  }
-
    size_t smallSize() const {
-    assert(category() == isSmall && small_[maxSmallSize] <= maxSmallSize);
-    return static_cast<size_t>(maxSmallSize)
-      - static_cast<size_t>(small_[maxSmallSize]);
+    assert(category() == Category::isSmall);
+    auto shift = kIsLittleEndian ? 0 : 2;
+    auto smallShifted = static_cast<size_t>(small_[maxSmallSize]) >> shift;
+    assert(static_cast<size_t>(maxSmallSize) >= smallShifted);
+    return static_cast<size_t>(maxSmallSize) - smallShifted;
    }
  
    void setSmallSize(size_t s) {
@@ -877,14 +854,13 @@ private:
      // so don't assume anything about the previous value of
      // small_[maxSmallSize].
      assert(s <= maxSmallSize);
-    small_[maxSmallSize] = maxSmallSize - s;
+    small_[maxSmallSize] = kIsLittleEndian
+      ? maxSmallSize - s
+      : (maxSmallSize - s) << 2;
+    writeTerminator();
    }
  };
  
-#if defined(__GNUC__) && !defined(__clang__)
-# pragma GCC diagnostic pop
-#endif
-
  #ifndef _LIBSTDCXX_FBSTRING
  /**
   * Dummy fbstring core that uses an actual std::string. This doesn't
@@ -971,7 +947,7 @@ class basic_fbstring {
        size() <= max_size() &&
        capacity() <= max_size() &&
        size() <= capacity() &&
-      (begin()[size()] == Storage::TERMINATOR || begin()[size()] == '\0');
+      begin()[size()] == '\0';
    }
  
    struct Invariant;
@@ -1027,7 +1003,23 @@ private:
  
  public:
    // C++11 21.4.2 construct/copy/destroy
-  explicit basic_fbstring(const A& a = A()) noexcept {
+
+  // Note: while the following two constructors can be (and previously were)
+  // collapsed into one constructor written this way:
+  //
+  //   explicit basic_fbstring(const A& a = A()) noexcept { }
+  //
+  // This can cause Clang (at least version 3.7) to fail with the error:
+  //   "chosen constructor is explicit in copy-initialization ...
+  //   in implicit initialization of field '(x)' with omitted initializer"
+  //
+  // if used in a struct which is default-initialized.  Hence the split into
+  // these two separate constructors.
+
+  basic_fbstring() noexcept : basic_fbstring(A()) {
+  }
+
+  explicit basic_fbstring(const A&) noexcept {
    }
  
    basic_fbstring(const basic_fbstring& str)
@@ -1051,20 +1043,19 @@ public:
      assign(str, pos, n);
    }
  
-  /* implicit */ basic_fbstring(const value_type* s, const A& a = A())
-      : store_(s, s ? traits_type::length(s) : ({
-          basic_fbstring<char> err = __PRETTY_FUNCTION__;
-          err += ": null pointer initializer not valid";
-          std::__throw_logic_error(err.c_str());
-          0;
-      })) {
+  /* implicit */ basic_fbstring(const value_type* s, const A& /*a*/ = A())
+      : store_(s, s
+          ? traits_type::length(s)
+          : (std::__throw_logic_error(
+                "basic_fbstring: null pointer initializer not valid"),
+             0)) {
    }
  
-  basic_fbstring(const value_type* s, size_type n, const A& a = A())
+  basic_fbstring(const value_type* s, size_type n, const A& /*a*/ = A())
        : store_(s, n) {
    }
  
-  basic_fbstring(size_type n, value_type c, const A& a = A()) {
+  basic_fbstring(size_type n, value_type c, const A& /*a*/ = A()) {
      auto const data = store_.expand_noinit(n);
      fbstring_detail::pod_fill(data, data + n, c);
      store_.writeTerminator();
@@ -1074,7 +1065,7 @@ public:
    basic_fbstring(InIt begin, InIt end,
                   typename std::enable_if<
                   !std::is_same<typename std::remove_const<InIt>::type,
-                 value_type*>::value, const A>::type & a = A()) {
+                 value_type*>::value, const A>::type & /*a*/ = A()) {
      assign(begin, end);
    }
  
@@ -1286,14 +1277,10 @@ public:
  
    // C++11 21.4.5 element access:
    const_reference operator[](size_type pos) const {
-    return *(c_str() + pos);
+    return *(begin() + pos);
    }
  
    reference operator[](size_type pos) {
-    if (pos == size()) {
-      // Just call c_str() to make sure '\0' is present
-      c_str();
-    }
      return *(begin() + pos);
    }
  
@@ -2002,11 +1989,18 @@ public:
      return find_last_not_of(&c, pos, 1);
    }
  
-  basic_fbstring substr(size_type pos = 0, size_type n = npos) const {
+  basic_fbstring substr(size_type pos = 0, size_type n = npos) const& {
      enforce(pos <= size(), std::__throw_out_of_range, "");
      return basic_fbstring(data() + pos, std::min(n, size() - pos));
    }
  
+  basic_fbstring substr(size_type pos = 0, size_type n = npos) && {
+    enforce(pos <= size(), std::__throw_out_of_range, "");
+    erase(0, pos);
+    if (n < size()) resize(n);
+    return std::move(*this);
+  }
+
    int compare(const basic_fbstring& str) const {
      // FIX due to Goncalo N M de Carvalho July 18, 2005
      return compare(0, size(), str);
@@ -2054,7 +2048,7 @@ private:
  };
  
  // non-member functions
-// C++11 21.4.8.1/2
+// C++11 21.4.8.1/1
  template <typename E, class T, class A, class S>
  inline
  basic_fbstring<E, T, A, S> operator+(const basic_fbstring<E, T, A, S>& lhs,
@@ -2096,24 +2090,45 @@ basic_fbstring<E, T, A, S> operator+(basic_fbstring<E, T, A, S>&& lhs,
    return std::move(lhs.append(rhs));
  }
  
+// C++11 21.4.8.1/5
  template <typename E, class T, class A, class S>
  inline
  basic_fbstring<E, T, A, S> operator+(
-  const typename basic_fbstring<E, T, A, S>::value_type* lhs,
+  const E* lhs,
    const basic_fbstring<E, T, A, S>& rhs) {
    //
    basic_fbstring<E, T, A, S> result;
-  const typename basic_fbstring<E, T, A, S>::size_type len =
-    basic_fbstring<E, T, A, S>::traits_type::length(lhs);
+  const auto len = basic_fbstring<E, T, A, S>::traits_type::length(lhs);
    result.reserve(len + rhs.size());
    result.append(lhs, len).append(rhs);
    return result;
  }
  
+// C++11 21.4.8.1/6
  template <typename E, class T, class A, class S>
  inline
  basic_fbstring<E, T, A, S> operator+(
-  typename basic_fbstring<E, T, A, S>::value_type lhs,
+  const E* lhs,
+  basic_fbstring<E, T, A, S>&& rhs) {
+  //
+  const auto len = basic_fbstring<E, T, A, S>::traits_type::length(lhs);
+  if (rhs.capacity() >= len + rhs.size()) {
+    // Good, at least we don't need to reallocate
+    rhs.insert(rhs.begin(), lhs, lhs + len);
+    return rhs;
+  }
+  // Meh, no go. Do it by hand since we have len already.
+  basic_fbstring<E, T, A, S> result;
+  result.reserve(len + rhs.size());
+  result.append(lhs, len).append(rhs);
+  return result;
+}
+
+// C++11 21.4.8.1/7
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(
+  E lhs,
    const basic_fbstring<E, T, A, S>& rhs) {
  
    basic_fbstring<E, T, A, S> result;
@@ -2123,11 +2138,29 @@ basic_fbstring<E, T, A, S> operator+(
    return result;
  }
  
+// C++11 21.4.8.1/8
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(
+  E lhs,
+  basic_fbstring<E, T, A, S>&& rhs) {
+  //
+  if (rhs.capacity() > rhs.size()) {
+    // Good, at least we don't need to reallocate
+    rhs.insert(rhs.begin(), lhs);
+    return rhs;
+  }
+  // Meh, no go. Forward to operator+(E, const&).
+  auto const& rhsC = rhs;
+  return lhs + rhsC;
+}
+
+// C++11 21.4.8.1/9
  template <typename E, class T, class A, class S>
  inline
  basic_fbstring<E, T, A, S> operator+(
    const basic_fbstring<E, T, A, S>& lhs,
-  const typename basic_fbstring<E, T, A, S>::value_type* rhs) {
+  const E* rhs) {
  
    typedef typename basic_fbstring<E, T, A, S>::size_type size_type;
    typedef typename basic_fbstring<E, T, A, S>::traits_type traits_type;
@@ -2139,11 +2172,22 @@ basic_fbstring<E, T, A, S> operator+(
    return result;
  }
  
+// C++11 21.4.8.1/10
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(
+  basic_fbstring<E, T, A, S>&& lhs,
+  const E* rhs) {
+  //
+  return std::move(lhs += rhs);
+}
+
+// C++11 21.4.8.1/11
  template <typename E, class T, class A, class S>
  inline
  basic_fbstring<E, T, A, S> operator+(
    const basic_fbstring<E, T, A, S>& lhs,
-  typename basic_fbstring<E, T, A, S>::value_type rhs) {
+  E rhs) {
  
    basic_fbstring<E, T, A, S> result;
    result.reserve(lhs.size() + 1);
@@ -2152,6 +2196,16 @@ basic_fbstring<E, T, A, S> operator+(
    return result;
  }
  
+// C++11 21.4.8.1/12
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(
+  basic_fbstring<E, T, A, S>&& lhs,
+  E rhs) {
+  //
+  return std::move(lhs += rhs);
+}
+
  template <typename E, class T, class A, class S>
  inline
  bool operator==(const basic_fbstring<E, T, A, S>& lhs,
@@ -2286,20 +2340,20 @@ std::basic_istream<
    auto err = __ios_base::goodbit;
    if (sentry) {
      auto n = is.width();
-    if (n == 0) {
+    if (n <= 0) {
        n = str.max_size();
      }
      str.erase();
-    auto got = is.rdbuf()->sgetc();
-    for (; extracted != n && got != T::eof() && !isspace(got); ++extracted) {
-      // Whew. We get to store this guy
+    for (auto got = is.rdbuf()->sgetc(); extracted != size_t(n); ++extracted) {
+      if (got == T::eof()) {
+        err |= __ios_base::eofbit;
+        is.width(0);
+        break;
+      }
+      if (isspace(got)) break;
        str.push_back(got);
        got = is.rdbuf()->snextc();
      }
-    if (got == T::eof()) {
-      err |= __ios_base::eofbit;
-      is.width(0);
-    }
    }
    if (!extracted) {
      err |= __ios_base::failbit;
@@ -2338,6 +2392,9 @@ operator<<(
        os.setstate(std::ios_base::badbit | std::ios_base::failbit);
      }
    }
+#elif defined(_MSC_VER)
+  // MSVC doesn't define __ostream_insert
+  os.write(str.data(), str.size());
  #else
    std::__ostream_insert(os, str.data(), str.size());
  #endif
@@ -2356,7 +2413,7 @@ getline(
      basic_fbstring<E, T, A, S>& str,
    typename basic_fbstring<E, T, A, S>::value_type delim) {
    // Use the nonstandard getdelim()
-  char * buf = NULL;
+  char * buf = nullptr;
    size_t size = 0;
    for (;;) {
      // This looks quadratic but it really depends on realloc
@@ -2451,43 +2508,41 @@ _GLIBCXX_END_NAMESPACE_VERSION
  //
  // Handle interaction with different C++ standard libraries, which
  // expect these types to be in different namespaces.
-namespace std {
  
-template <class C>
-struct hash<folly::basic_fbstring<C> > : private hash<const C*> {
-  size_t operator()(const folly::basic_fbstring<C> & s) const {
-    return hash<const C*>::operator()(s.c_str());
-  }
-};
+#define FOLLY_FBSTRING_HASH1(T) \
+  template <> \
+  struct hash< ::folly::basic_fbstring<T> > { \
+    size_t operator()(const ::folly::fbstring& s) const { \
+      return ::folly::hash::fnv32_buf(s.data(), s.size()); \
+    } \
+  };
  
-template <>
-struct hash< ::folly::fbstring> {
-  size_t operator()(const ::folly::fbstring& s) const {
-    return ::folly::hash::fnv32_buf(s.data(), s.size());
-  }
-};
+// The C++11 standard says that these four are defined
+#define FOLLY_FBSTRING_HASH \
+  FOLLY_FBSTRING_HASH1(char) \
+  FOLLY_FBSTRING_HASH1(char16_t) \
+  FOLLY_FBSTRING_HASH1(char32_t) \
+  FOLLY_FBSTRING_HASH1(wchar_t)
  
-}
+namespace std {
+
+FOLLY_FBSTRING_HASH
  
+}  // namespace std
+
+#if FOLLY_HAVE_DEPRECATED_ASSOC
  #if defined(_GLIBCXX_SYMVER) && !defined(__BIONIC__)
  namespace __gnu_cxx {
  
-template <class C>
-struct hash<folly::basic_fbstring<C> > : private hash<const C*> {
-  size_t operator()(const folly::basic_fbstring<C> & s) const {
-    return hash<const C*>::operator()(s.c_str());
-  }
-};
-
-template <>
-struct hash< ::folly::fbstring> {
-  size_t operator()(const ::folly::fbstring& s) const {
-    return ::folly::hash::fnv32_buf(s.data(), s.size());
-  }
-};
+FOLLY_FBSTRING_HASH
  
-}
+}  // namespace __gnu_cxx
  #endif // _GLIBCXX_SYMVER && !__BIONIC__
+#endif // FOLLY_HAVE_DEPRECATED_ASSOC
+
+#undef FOLLY_FBSTRING_HASH
+#undef FOLLY_FBSTRING_HASH1
+
  #endif // _LIBSTDCXX_FBSTRING
  
  #pragma GCC diagnostic pop
@@ -2497,4 +2552,9 @@ struct hash< ::folly::fbstring> {
  #undef FBSTRING_LIKELY
  #undef FBSTRING_UNLIKELY
  
+#ifdef FOLLY_DEFINED_NDEBUG_FOR_FBSTRING
+#undef NDEBUG
+#undef FOLLY_DEFINED_NDEBUG_FOR_FBSTRING
+#endif // FOLLY_DEFINED_NDEBUG_FOR_FBSTRING
+
  #endif // FOLLY_BASE_FBSTRING_H_