From 27494a20393fa45072e7d526d358835f3abe312a Mon Sep 17 00:00:00 2001
From: Jordan DeLong <jdelong@fb.com>
Date: Sat, 2 Jun 2012 11:19:29 -0700
Subject: [PATCH] Pull from FB rev 63ce89e2f2301e6bba44a111cc7d4218022156f6

---
 folly/Arena-inl.h                             |   84 +
 folly/Arena.h                                 |  227 ++
 folly/AtomicHashArray-inl.h                   |  366 +++
 folly/AtomicHashArray.h                       |  283 ++
 folly/AtomicHashMap-inl.h                     |  402 +++
 folly/AtomicHashMap.h                         |  393 +++
 folly/Benchmark.cpp                           |  393 +++
 folly/Benchmark.h                             |  375 +++
 folly/Bits.h                                  |  519 ++++
 folly/ConcurrentSkipList-inl.h                |  220 ++
 folly/ConcurrentSkipList.h                    |  852 ++++++
 folly/Conv.cpp                                |  136 +
 folly/Conv.h                                  |  844 ++++++
 folly/DiscriminatedPtr.h                      |  221 ++
 folly/FBString.h                              | 2284 +++++++++++++++++
 folly/FBVector.h                              |  936 +++++++
 folly/Foreach.h                               |  260 ++
 folly/Format-inl.h                            | 1078 ++++++++
 folly/Format.cpp                              |  137 +
 folly/Format.h                                |  272 ++
 folly/FormatArg.h                             |  255 ++
 folly/GroupVarint.cpp                         |   32 +
 folly/GroupVarint.h                           |  600 +++++
 folly/Hash.h                                  |  243 ++
 folly/Histogram-inl.h                         |  256 ++
 folly/Histogram.h                             |  381 +++
 folly/IntrusiveList.h                         |  135 +
 folly/Likely.h                                |   39 +
 folly/Makefile.am                             |   86 +
 folly/Malloc.h                                |  192 ++
 folly/MapUtil.h                               |   72 +
 folly/PackedSyncPtr.h                         |  150 ++
 folly/Portability.h                           |   30 +
 folly/Preprocessor.h                          |   70 +
 folly/ProducerConsumerQueue.h                 |  128 +
 folly/README                                  |   73 +
 folly/RWSpinLock.h                            |  735 ++++++
 folly/Random.cpp                              |   35 +
 folly/Random.h                                |   31 +
 folly/Range.cpp                               |   37 +
 folly/Range.h                                 |  552 ++++
 folly/SConstruct.double-conversion            |   21 +
 folly/ScopeGuard.h                            |  157 ++
 folly/SmallLocks.h                            |  284 ++
 folly/StlAllocator.h                          |  126 +
 folly/String-inl.h                            |  323 +++
 folly/String.cpp                              |  313 +++
 folly/String.h                                |  292 +++
 folly/Synchronized.h                          |  653 +++++
 folly/ThreadCachedArena.cpp                   |   43 +
 folly/ThreadCachedArena.h                     |   81 +
 folly/ThreadCachedInt.h                       |  176 ++
 folly/ThreadLocal.h                           |  346 +++
 folly/TimeoutQueue.cpp                        |   76 +
 folly/TimeoutQueue.h                          |  132 +
 folly/Traits.h                                |  236 ++
 folly/Unicode.cpp                             |   54 +
 folly/Unicode.h                               |   39 +
 folly/build/generate_escape_tables.py         |   91 +
 folly/build/generate_format_tables.py         |   79 +
 folly/build/generate_varint_tables.py         |  106 +
 folly/configure.ac                            |   92 +
 folly/detail/AtomicHashUtils.h                |   37 +
 folly/detail/BitIteratorDetail.h              |   93 +
 folly/detail/DiscriminatedPtrDetail.h         |  165 ++
 folly/detail/GroupVarintDetail.h              |  104 +
 folly/detail/ThreadLocalDetail.h              |  321 +++
 folly/docs/.gitignore                         |    1 +
 folly/docs/AtomicHashMap.md                   |  134 +
 folly/docs/Benchmark.md                       |  281 ++
 folly/docs/Conv.md                            |  217 ++
 folly/docs/Dynamic.md                         |  189 ++
 folly/docs/FBString.md                        |   46 +
 folly/docs/FBVector.md                        |  242 ++
 folly/docs/Fbvector--graphical_solutions.png  |  Bin 0 -> 15295 bytes
 folly/docs/Format.md                          |  181 ++
 folly/docs/GroupVarint.md                     |   46 +
 folly/docs/Histogram.md                       |  104 +
 folly/docs/Makefile                           |   33 +
 folly/docs/Overview.md                        |  259 ++
 folly/docs/PackedSyncPtr.md                   |   77 +
 folly/docs/ProducerConsumerQueue.md           |   40 +
 folly/docs/SmallLocks.md                      |   24 +
 folly/docs/Synchronized.md                    |  606 +++++
 folly/docs/ThreadCachedInt.md                 |   98 +
 folly/docs/ThreadLocal.md                     |  106 +
 folly/docs/small_vector.md                    |   69 +
 folly/docs/style.css                          |    7 +
 folly/dynamic-inl.h                           |  850 ++++++
 folly/dynamic.cpp                             |   40 +
 folly/dynamic.h                               |  485 ++++
 folly/eventfd.h                               |   79 +
 folly/experimental/Bits.h                     |  134 +
 folly/experimental/TestUtil.cpp               |   97 +
 folly/experimental/TestUtil.h                 |   63 +
 folly/experimental/io/Cursor.h                |  411 +++
 folly/experimental/io/IOBuf.cpp               |  600 +++++
 folly/experimental/io/IOBuf.h                 |  972 +++++++
 folly/experimental/io/IOBufQueue.cpp          |  226 ++
 folly/experimental/io/IOBufQueue.h            |  176 ++
 .../experimental/io/test/IOBufCursorTest.cpp  |  342 +++
 folly/experimental/io/test/IOBufQueueTest.cpp |  231 ++
 folly/experimental/io/test/IOBufTest.cpp      |  525 ++++
 folly/experimental/io/test/Makefile.am        |   12 +
 .../experimental/io/test/NetworkBenchmark.cpp |  172 ++
 folly/experimental/test/BitsTest.cpp          |   59 +
 folly/experimental/test/TestUtilTest.cpp      |   52 +
 folly/folly-config.h                          |  303 +++
 folly/json.cpp                                |  696 +++++
 folly/json.h                                  |  118 +
 folly/m4/ac_cxx_compile_stdcxx_0x.m4          |  110 +
 folly/m4/ax_boost_base.m4                     |  258 ++
 folly/m4/ax_boost_regex.m4                    |  111 +
 folly/m4/ax_boost_thread.m4                   |  149 ++
 folly/m4/ax_prefix_config.m4                  |  209 ++
 folly/small_vector.h                          | 1189 +++++++++
 folly/sorted_vector_types.h                   |  606 +++++
 folly/test/AtomicHashArrayTest.cpp            |   95 +
 folly/test/AtomicHashMapTest.cpp              |  807 ++++++
 folly/test/BenchmarkTest.cpp                  |   74 +
 folly/test/BitIteratorTest.cpp                |  187 ++
 folly/test/BitsTest.cpp                       |  157 ++
 folly/test/ConcurrentSkipListBenchmark.cpp    |  696 +++++
 folly/test/ConcurrentSkipListTest.cpp         |  329 +++
 folly/test/ConvTest.cpp                       |  692 +++++
 folly/test/DiscriminatedPtrTest.cpp           |  125 +
 folly/test/DynamicTest.cpp                    |  274 ++
 folly/test/EndianTest.cpp                     |   65 +
 folly/test/EventFDTest.cpp                    |   73 +
 folly/test/FBStringLibstdcxxStdexceptTest.cpp |   24 +
 folly/test/FBStringTest.cpp                   | 1057 ++++++++
 folly/test/FBStringTestBenchmarks.cpp.h       |  224 ++
 folly/test/FBVectorTest.cpp                   |  259 ++
 folly/test/FBVectorTestBenchmarks.cpp.h       |  379 +++
 folly/test/ForeachTest.cpp                    |  269 ++
 folly/test/FormatTest.cpp                     |  410 +++
 folly/test/GroupVarintTest.cpp                |  261 ++
 folly/test/HashTest.cpp                       |  137 +
 folly/test/HistogramTest.cpp                  |  204 ++
 folly/test/JsonTest.cpp                       |  355 +++
 folly/test/Makefile.am                        |  131 +
 folly/test/MapUtilTest.cpp                    |   34 +
 folly/test/PackedSyncPtrTest.cpp              |  133 +
 folly/test/ProducerConsumerQueueTest.cpp      |  224 ++
 folly/test/RWSpinLockTest.cpp                 |  247 ++
 folly/test/RangeTest.cpp                      |  140 +
 folly/test/ScopeGuardTest.cpp                 |  258 ++
 folly/test/SmallLocksTest.cpp                 |  142 +
 folly/test/StringTest.cpp                     |  645 +++++
 folly/test/SynchronizedTest.cpp               |  106 +
 folly/test/SynchronizedTestLib-inl.h          |  291 +++
 folly/test/SynchronizedTestLib.h              |   47 +
 folly/test/ThreadCachedArenaTest.cpp          |  265 ++
 folly/test/ThreadCachedIntTest.cpp            |  282 ++
 folly/test/ThreadLocalTest.cpp                |  359 +++
 folly/test/TimeoutQueueTest.cpp               |  113 +
 folly/test/function_benchmark/Makefile.am     |   11 +
 .../function_benchmark/benchmark_impl.cpp     |   36 +
 .../test/function_benchmark/benchmark_impl.h  |   35 +
 folly/test/function_benchmark/main.cpp        |  196 ++
 .../function_benchmark/test_functions.cpp     |   19 +
 .../test/function_benchmark/test_functions.h  |   18 +
 folly/test/small_vector_test.cpp              |  752 ++++++
 folly/test/sorted_vector_test.cpp             |  273 ++
 164 files changed, 42799 insertions(+)
 create mode 100644 folly/Arena-inl.h
 create mode 100644 folly/Arena.h
 create mode 100644 folly/AtomicHashArray-inl.h
 create mode 100644 folly/AtomicHashArray.h
 create mode 100644 folly/AtomicHashMap-inl.h
 create mode 100644 folly/AtomicHashMap.h
 create mode 100644 folly/Benchmark.cpp
 create mode 100644 folly/Benchmark.h
 create mode 100644 folly/Bits.h
 create mode 100644 folly/ConcurrentSkipList-inl.h
 create mode 100644 folly/ConcurrentSkipList.h
 create mode 100644 folly/Conv.cpp
 create mode 100644 folly/Conv.h
 create mode 100644 folly/DiscriminatedPtr.h
 create mode 100644 folly/FBString.h
 create mode 100644 folly/FBVector.h
 create mode 100644 folly/Foreach.h
 create mode 100644 folly/Format-inl.h
 create mode 100644 folly/Format.cpp
 create mode 100644 folly/Format.h
 create mode 100644 folly/FormatArg.h
 create mode 100644 folly/GroupVarint.cpp
 create mode 100644 folly/GroupVarint.h
 create mode 100644 folly/Hash.h
 create mode 100644 folly/Histogram-inl.h
 create mode 100644 folly/Histogram.h
 create mode 100644 folly/IntrusiveList.h
 create mode 100644 folly/Likely.h
 create mode 100644 folly/Makefile.am
 create mode 100644 folly/Malloc.h
 create mode 100644 folly/MapUtil.h
 create mode 100644 folly/PackedSyncPtr.h
 create mode 100644 folly/Portability.h
 create mode 100644 folly/Preprocessor.h
 create mode 100644 folly/ProducerConsumerQueue.h
 create mode 100644 folly/README
 create mode 100644 folly/RWSpinLock.h
 create mode 100644 folly/Random.cpp
 create mode 100644 folly/Random.h
 create mode 100644 folly/Range.cpp
 create mode 100644 folly/Range.h
 create mode 100644 folly/SConstruct.double-conversion
 create mode 100644 folly/ScopeGuard.h
 create mode 100644 folly/SmallLocks.h
 create mode 100644 folly/StlAllocator.h
 create mode 100644 folly/String-inl.h
 create mode 100644 folly/String.cpp
 create mode 100644 folly/String.h
 create mode 100644 folly/Synchronized.h
 create mode 100644 folly/ThreadCachedArena.cpp
 create mode 100644 folly/ThreadCachedArena.h
 create mode 100644 folly/ThreadCachedInt.h
 create mode 100644 folly/ThreadLocal.h
 create mode 100644 folly/TimeoutQueue.cpp
 create mode 100644 folly/TimeoutQueue.h
 create mode 100644 folly/Traits.h
 create mode 100644 folly/Unicode.cpp
 create mode 100644 folly/Unicode.h
 create mode 100755 folly/build/generate_escape_tables.py
 create mode 100755 folly/build/generate_format_tables.py
 create mode 100755 folly/build/generate_varint_tables.py
 create mode 100644 folly/configure.ac
 create mode 100644 folly/detail/AtomicHashUtils.h
 create mode 100644 folly/detail/BitIteratorDetail.h
 create mode 100644 folly/detail/DiscriminatedPtrDetail.h
 create mode 100644 folly/detail/GroupVarintDetail.h
 create mode 100644 folly/detail/ThreadLocalDetail.h
 create mode 100644 folly/docs/.gitignore
 create mode 100644 folly/docs/AtomicHashMap.md
 create mode 100644 folly/docs/Benchmark.md
 create mode 100644 folly/docs/Conv.md
 create mode 100644 folly/docs/Dynamic.md
 create mode 100644 folly/docs/FBString.md
 create mode 100644 folly/docs/FBVector.md
 create mode 100644 folly/docs/Fbvector--graphical_solutions.png
 create mode 100644 folly/docs/Format.md
 create mode 100644 folly/docs/GroupVarint.md
 create mode 100644 folly/docs/Histogram.md
 create mode 100644 folly/docs/Makefile
 create mode 100644 folly/docs/Overview.md
 create mode 100644 folly/docs/PackedSyncPtr.md
 create mode 100644 folly/docs/ProducerConsumerQueue.md
 create mode 100644 folly/docs/SmallLocks.md
 create mode 100644 folly/docs/Synchronized.md
 create mode 100644 folly/docs/ThreadCachedInt.md
 create mode 100644 folly/docs/ThreadLocal.md
 create mode 100644 folly/docs/small_vector.md
 create mode 100644 folly/docs/style.css
 create mode 100644 folly/dynamic-inl.h
 create mode 100644 folly/dynamic.cpp
 create mode 100644 folly/dynamic.h
 create mode 100644 folly/eventfd.h
 create mode 100644 folly/experimental/Bits.h
 create mode 100644 folly/experimental/TestUtil.cpp
 create mode 100644 folly/experimental/TestUtil.h
 create mode 100644 folly/experimental/io/Cursor.h
 create mode 100644 folly/experimental/io/IOBuf.cpp
 create mode 100644 folly/experimental/io/IOBuf.h
 create mode 100644 folly/experimental/io/IOBufQueue.cpp
 create mode 100644 folly/experimental/io/IOBufQueue.h
 create mode 100644 folly/experimental/io/test/IOBufCursorTest.cpp
 create mode 100644 folly/experimental/io/test/IOBufQueueTest.cpp
 create mode 100644 folly/experimental/io/test/IOBufTest.cpp
 create mode 100644 folly/experimental/io/test/Makefile.am
 create mode 100644 folly/experimental/io/test/NetworkBenchmark.cpp
 create mode 100644 folly/experimental/test/BitsTest.cpp
 create mode 100644 folly/experimental/test/TestUtilTest.cpp
 create mode 100644 folly/folly-config.h
 create mode 100644 folly/json.cpp
 create mode 100644 folly/json.h
 create mode 100644 folly/m4/ac_cxx_compile_stdcxx_0x.m4
 create mode 100644 folly/m4/ax_boost_base.m4
 create mode 100644 folly/m4/ax_boost_regex.m4
 create mode 100644 folly/m4/ax_boost_thread.m4
 create mode 100644 folly/m4/ax_prefix_config.m4
 create mode 100644 folly/small_vector.h
 create mode 100644 folly/sorted_vector_types.h
 create mode 100644 folly/test/AtomicHashArrayTest.cpp
 create mode 100644 folly/test/AtomicHashMapTest.cpp
 create mode 100644 folly/test/BenchmarkTest.cpp
 create mode 100644 folly/test/BitIteratorTest.cpp
 create mode 100644 folly/test/BitsTest.cpp
 create mode 100644 folly/test/ConcurrentSkipListBenchmark.cpp
 create mode 100644 folly/test/ConcurrentSkipListTest.cpp
 create mode 100644 folly/test/ConvTest.cpp
 create mode 100644 folly/test/DiscriminatedPtrTest.cpp
 create mode 100644 folly/test/DynamicTest.cpp
 create mode 100644 folly/test/EndianTest.cpp
 create mode 100644 folly/test/EventFDTest.cpp
 create mode 100644 folly/test/FBStringLibstdcxxStdexceptTest.cpp
 create mode 100644 folly/test/FBStringTest.cpp
 create mode 100644 folly/test/FBStringTestBenchmarks.cpp.h
 create mode 100644 folly/test/FBVectorTest.cpp
 create mode 100644 folly/test/FBVectorTestBenchmarks.cpp.h
 create mode 100644 folly/test/ForeachTest.cpp
 create mode 100644 folly/test/FormatTest.cpp
 create mode 100644 folly/test/GroupVarintTest.cpp
 create mode 100644 folly/test/HashTest.cpp
 create mode 100644 folly/test/HistogramTest.cpp
 create mode 100644 folly/test/JsonTest.cpp
 create mode 100644 folly/test/Makefile.am
 create mode 100644 folly/test/MapUtilTest.cpp
 create mode 100644 folly/test/PackedSyncPtrTest.cpp
 create mode 100644 folly/test/ProducerConsumerQueueTest.cpp
 create mode 100644 folly/test/RWSpinLockTest.cpp
 create mode 100644 folly/test/RangeTest.cpp
 create mode 100644 folly/test/ScopeGuardTest.cpp
 create mode 100644 folly/test/SmallLocksTest.cpp
 create mode 100644 folly/test/StringTest.cpp
 create mode 100644 folly/test/SynchronizedTest.cpp
 create mode 100644 folly/test/SynchronizedTestLib-inl.h
 create mode 100644 folly/test/SynchronizedTestLib.h
 create mode 100644 folly/test/ThreadCachedArenaTest.cpp
 create mode 100644 folly/test/ThreadCachedIntTest.cpp
 create mode 100644 folly/test/ThreadLocalTest.cpp
 create mode 100644 folly/test/TimeoutQueueTest.cpp
 create mode 100644 folly/test/function_benchmark/Makefile.am
 create mode 100644 folly/test/function_benchmark/benchmark_impl.cpp
 create mode 100644 folly/test/function_benchmark/benchmark_impl.h
 create mode 100644 folly/test/function_benchmark/main.cpp
 create mode 100644 folly/test/function_benchmark/test_functions.cpp
 create mode 100644 folly/test/function_benchmark/test_functions.h
 create mode 100644 folly/test/small_vector_test.cpp
 create mode 100644 folly/test/sorted_vector_test.cpp

diff --git a/folly/Arena-inl.h b/folly/Arena-inl.h
new file mode 100644
index 00000000..ad839ae4
--- /dev/null
+++ b/folly/Arena-inl.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_ARENA_H_
+#error This file may only be included from Arena.h
+#endif
+
+// Implementation of Arena.h functions
+
+namespace folly {
+
+template <class Alloc>
+std::pair<typename Arena<Alloc>::Block*, size_t>
+Arena<Alloc>::Block::allocate(Alloc& alloc, size_t size, bool allowSlack) {
+  size_t allocSize = sizeof(Block) + size;
+  if (allowSlack) {
+    allocSize = ArenaAllocatorTraits<Alloc>::goodSize(alloc, allocSize);
+  }
+
+  void* mem = alloc.allocate(allocSize);
+  assert(isAligned(mem));
+  return std::make_pair(new (mem) Block(), allocSize - sizeof(Block));
+}
+
+template <class Alloc>
+void Arena<Alloc>::Block::deallocate(Alloc& alloc) {
+  this->~Block();
+  alloc.deallocate(this);
+}
+
+template <class Alloc>
+void* Arena<Alloc>::allocateSlow(size_t size) {
+  std::pair<Block*, size_t> p;
+  char* start;
+  if (size > minBlockSize()) {
+    // Allocate a large block for this chunk only, put it at the back of the
+    // list so it doesn't get used for small allocations; don't change ptr_
+    // and end_, let them point into a normal block (or none, if they're
+    // null)
+    p = Block::allocate(alloc(), size, false);
+    start = p.first->start();
+    blocks_.push_back(*p.first);
+  } else {
+    // Allocate a normal sized block and carve out size bytes from it
+    p = Block::allocate(alloc(), minBlockSize(), true);
+    start = p.first->start();
+    blocks_.push_front(*p.first);
+    ptr_ = start + size;
+    end_ = start + p.second;
+  }
+
+  assert(p.second >= size);
+  return start;
+}
+
+template <class Alloc>
+void Arena<Alloc>::merge(Arena<Alloc>&& other) {
+  blocks_.splice_after(blocks_.before_begin(), other.blocks_);
+  other.blocks_.clear();
+  other.ptr_ = other.end_ = nullptr;
+}
+
+template <class Alloc>
+Arena<Alloc>::~Arena() {
+  auto disposer = [this] (Block* b) { b->deallocate(this->alloc()); };
+  while (!blocks_.empty()) {
+    blocks_.pop_front_and_dispose(disposer);
+  }
+}
+
+}  // namespace folly
diff --git a/folly/Arena.h b/folly/Arena.h
new file mode 100644
index 00000000..8db3ca5d
--- /dev/null
+++ b/folly/Arena.h
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_ARENA_H_
+#define FOLLY_ARENA_H_
+
+#include <cassert>
+#include <utility>
+#include <limits>
+#include <boost/intrusive/slist.hpp>
+
+#include "folly/Likely.h"
+#include "folly/Malloc.h"
+
+namespace folly {
+
+/**
+ * Simple arena: allocate memory which gets freed when the arena gets
+ * destroyed.
+ *
+ * The arena itself allocates memory using a custom allocator which provides
+ * the following interface (same as required by StlAllocator in StlAllocator.h)
+ *
+ *   void* allocate(size_t size);
+ *      Allocate a block of size bytes, properly aligned to the maximum
+ *      alignment required on your system; throw std::bad_alloc if the
+ *      allocation can't be satisfied.
+ *
+ *   void deallocate(void* ptr);
+ *      Deallocate a previously allocated block.
+ *
+ * You may also specialize ArenaAllocatorTraits for your allocator type to
+ * provide:
+ *
+ *   size_t goodSize(const Allocator& alloc, size_t size) const;
+ *      Return a size (>= the provided size) that is considered "good" for your
+ *      allocator (for example, if your allocator allocates memory in 4MB
+ *      chunks, size should be rounded up to 4MB).  The provided value is
+ *      guaranteed to be rounded up to a multiple of the maximum alignment
+ *      required on your system; the returned value must be also.
+ *
+ * An implementation that uses malloc() / free() is defined below, see
+ * SysAlloc / SysArena.
+ */
+template <class Alloc> struct ArenaAllocatorTraits;
+template <class Alloc>
+class Arena {
+ public:
+  explicit Arena(const Alloc& alloc,
+                 size_t minBlockSize = kDefaultMinBlockSize)
+    : allocAndSize_(alloc, minBlockSize),
+      ptr_(nullptr),
+      end_(nullptr) {
+  }
+
+  ~Arena();
+
+  void* allocate(size_t size) {
+    size = roundUp(size);
+
+    if (LIKELY(end_ - ptr_ >= size)) {
+      // Fast path: there's enough room in the current block
+      char* r = ptr_;
+      ptr_ += size;
+      assert(isAligned(r));
+      return r;
+    }
+
+    // Not enough room in the current block
+    void* r = allocateSlow(size);
+    assert(isAligned(r));
+    return r;
+  }
+
+  void deallocate(void* p) {
+    // Deallocate? Never!
+  }
+
+  // Transfer ownership of all memory allocated from "other" to "this".
+  void merge(Arena&& other);
+
+ private:
+  // not copyable
+  Arena(const Arena&) = delete;
+  Arena& operator=(const Arena&) = delete;
+
+  // movable
+  Arena(Arena&&) = default;
+  Arena& operator=(Arena&&) = default;
+
+  struct Block;
+  typedef boost::intrusive::slist_member_hook<
+    boost::intrusive::tag<Arena>> BlockLink;
+
+  struct Block {
+    BlockLink link;
+
+    // Allocate a block with at least size bytes of storage.
+    // If allowSlack is true, allocate more than size bytes if convenient
+    // (via ArenaAllocatorTraits::goodSize()) as we'll try to pack small
+    // allocations in this block.
+    static std::pair<Block*, size_t> allocate(
+        Alloc& alloc, size_t size, bool allowSlack);
+    void deallocate(Alloc& alloc);
+
+    char* start() {
+      return reinterpret_cast<char*>(this + 1);
+    }
+
+   private:
+    Block() { }
+    ~Block() { }
+  } __attribute__((aligned));
+  // This should be alignas(std::max_align_t) but neither alignas nor
+  // max_align_t are supported by gcc 4.6.2.
+
+ public:
+  static constexpr size_t kDefaultMinBlockSize = 4096 - sizeof(Block);
+
+ private:
+  static constexpr size_t maxAlign = alignof(Block);
+  static constexpr bool isAligned(uintptr_t address) {
+    return (address & (maxAlign - 1)) == 0;
+  }
+  static bool isAligned(void* p) {
+    return isAligned(reinterpret_cast<uintptr_t>(p));
+  }
+
+  // Round up size so it's properly aligned
+  static constexpr size_t roundUp(size_t size) {
+    return (size + maxAlign - 1) & ~(maxAlign - 1);
+  }
+
+  // cache_last<true> makes the list keep a pointer to the last element, so we
+  // have push_back() and constant time splice_after()
+  typedef boost::intrusive::slist<
+    Block,
+    boost::intrusive::member_hook<Block, BlockLink, &Block::link>,
+    boost::intrusive::constant_time_size<false>,
+    boost::intrusive::cache_last<true>> BlockList;
+
+  void* allocateSlow(size_t size);
+
+  // Empty member optimization: package Alloc with a non-empty member
+  // in case Alloc is empty (as it is in the case of SysAlloc).
+  struct AllocAndSize : public Alloc {
+    explicit AllocAndSize(const Alloc& a, size_t s)
+      : Alloc(a), minBlockSize(s) {
+    }
+
+    size_t minBlockSize;
+  };
+
+  size_t minBlockSize() const {
+    return allocAndSize_.minBlockSize;
+  }
+  Alloc& alloc() { return allocAndSize_; }
+  const Alloc& alloc() const { return allocAndSize_; }
+
+  AllocAndSize allocAndSize_;
+  BlockList blocks_;
+  char* ptr_;
+  char* end_;
+};
+
+/**
+ * By default, don't pad the given size.
+ */
+template <class Alloc>
+struct ArenaAllocatorTraits {
+  static size_t goodSize(const Alloc& alloc, size_t size) {
+    return size;
+  }
+};
+
+/**
+ * Arena-compatible allocator that calls malloc() and free(); see
+ * goodMallocSize() in Malloc.h for goodSize().
+ */
+class SysAlloc {
+ public:
+  void* allocate(size_t size) {
+    void* mem = malloc(size);
+    if (!mem) throw std::bad_alloc();
+    return mem;
+  }
+
+  void deallocate(void* p) {
+    free(p);
+  }
+};
+
+template <>
+struct ArenaAllocatorTraits<SysAlloc> {
+  static size_t goodSize(const SysAlloc& alloc, size_t size) {
+    return goodMallocSize(size);
+  }
+};
+
+/**
+ * Arena that uses the system allocator (malloc / free)
+ */
+class SysArena : public Arena<SysAlloc> {
+ public:
+  explicit SysArena(size_t minBlockSize = kDefaultMinBlockSize)
+    : Arena<SysAlloc>(SysAlloc(), minBlockSize) {
+  }
+};
+
+}  // namespace folly
+
+#include "folly/Arena-inl.h"
+
+#endif /* FOLLY_ARENA_H_ */
diff --git a/folly/AtomicHashArray-inl.h b/folly/AtomicHashArray-inl.h
new file mode 100644
index 00000000..936939ff
--- /dev/null
+++ b/folly/AtomicHashArray-inl.h
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_ATOMICHASHARRAY_H_
+#error "This should only be included by AtomicHashArray.h"
+#endif
+
+#include "folly/Bits.h"
+#include "folly/detail/AtomicHashUtils.h"
+
+namespace folly {
+
+// AtomicHashArray private constructor --
+template <class KeyT, class ValueT, class HashFcn>
+AtomicHashArray<KeyT, ValueT, HashFcn>::
+AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey,
+                KeyT erasedKey, double maxLoadFactor, size_t cacheSize)
+    : capacity_(capacity), maxEntries_(size_t(maxLoadFactor * capacity_ + 0.5)),
+      kEmptyKey_(emptyKey), kLockedKey_(lockedKey), kErasedKey_(erasedKey),
+      kAnchorMask_(nextPowTwo(capacity_) - 1), numEntries_(0, cacheSize),
+      numPendingEntries_(0, cacheSize), isFull_(0), numErases_(0) {
+}
+
+/*
+ * findInternal --
+ *
+ *   Sets ret.second to value found and ret.index to index
+ *   of key and returns true, or if key does not exist returns false and
+ *   ret.index is set to capacity_.
+ */
+template <class KeyT, class ValueT, class HashFcn>
+typename AtomicHashArray<KeyT, ValueT, HashFcn>::SimpleRetT
+AtomicHashArray<KeyT, ValueT, HashFcn>::
+findInternal(const KeyT key_in) {
+  DCHECK_NE(key_in, kEmptyKey_);
+  DCHECK_NE(key_in, kLockedKey_);
+  DCHECK_NE(key_in, kErasedKey_);
+  for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0;
+       ;
+       idx = probeNext(idx, numProbes)) {
+    const KeyT key = relaxedLoadKey(cells_[idx]);
+    if (LIKELY(key == key_in)) {
+      return SimpleRetT(idx, true);
+    }
+    if (UNLIKELY(key == kEmptyKey_)) {
+      // if we hit an empty element, this key does not exist
+      return SimpleRetT(capacity_, false);
+    }
+    ++numProbes;
+    if (UNLIKELY(numProbes >= capacity_)) {
+      // probed every cell...fail
+      return SimpleRetT(capacity_, false);
+    }
+  }
+}
+
+/*
+ * insertInternal --
+ *
+ *   Returns false on failure due to key collision or full.
+ *   Also sets ret.index to the index of the key.  If the map is full, sets
+ *   ret.index = capacity_.  Also sets ret.second to cell value, thus if insert
+ *   successful this will be what we just inserted, if there is a key collision
+ *   this will be the previously inserted value, and if the map is full it is
+ *   default.
+ */
+template <class KeyT, class ValueT, class HashFcn>
+typename AtomicHashArray<KeyT, ValueT, HashFcn>::SimpleRetT
+AtomicHashArray<KeyT, ValueT, HashFcn>::
+insertInternal(const value_type& record) {
+  const short NO_NEW_INSERTS = 1;
+  const short NO_PENDING_INSERTS = 2;
+  const KeyT key_in = record.first;
+  CHECK_NE(key_in, kEmptyKey_);
+  CHECK_NE(key_in, kLockedKey_);
+  CHECK_NE(key_in, kErasedKey_);
+  for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0;
+       ;
+       idx = probeNext(idx, numProbes)) {
+    DCHECK_LT(idx, capacity_);
+    value_type* cell = &cells_[idx];
+    if (relaxedLoadKey(*cell) == kEmptyKey_) {
+      // NOTE: isFull_ is set based on numEntries_.readFast(), so it's
+      // possible to insert more than maxEntries_ entries. However, it's not
+      // possible to insert past capacity_.
+      ++numPendingEntries_;
+      if (isFull_.load(std::memory_order_acquire)) {
+        --numPendingEntries_;
+
+        // Before deciding whether this insert succeeded, this thread needs to
+        // wait until no other thread can add a new entry.
+
+        // Correctness assumes isFull_ is true at this point. If
+        // another thread now does ++numPendingEntries_, we expect it
+        // to pass the isFull_.load() test above. (It shouldn't insert
+        // a new entry.)
+        FOLLY_SPIN_WAIT(
+          isFull_.load(std::memory_order_acquire) != NO_PENDING_INSERTS
+            && numPendingEntries_.readFull() != 0
+        );
+        isFull_.store(NO_PENDING_INSERTS, std::memory_order_release);
+
+        if (relaxedLoadKey(*cell) == kEmptyKey_) {
+          // Don't insert past max load factor
+          return SimpleRetT(capacity_, false);
+        }
+      } else {
+        // An unallocated cell. Try once to lock it. If we succeed, insert here.
+        // If we fail, fall through to comparison below; maybe the insert that
+        // just beat us was for this very key....
+        if (tryLockCell(cell)) {
+          // Write the value - done before unlocking
+          try {
+            DCHECK(relaxedLoadKey(*cell) == kLockedKey_);
+            /*
+             * This happens using the copy constructor because we won't have
+             * constructed a lhs to use an assignment operator on when
+             * values are being set.
+             */
+            new (&cell->second) ValueT(record.second);
+            unlockCell(cell, key_in); // Sets the new key
+          } catch (...) {
+            unlockCell(cell, kEmptyKey_);
+            --numPendingEntries_;
+            throw;
+          }
+          DCHECK(relaxedLoadKey(*cell) == key_in);
+          --numPendingEntries_;
+          ++numEntries_;  // This is a thread cached atomic increment :)
+          if (numEntries_.readFast() >= maxEntries_) {
+            isFull_.store(NO_NEW_INSERTS, std::memory_order_relaxed);
+          }
+          return SimpleRetT(idx, true);
+        }
+        --numPendingEntries_;
+      }
+    }
+    DCHECK(relaxedLoadKey(*cell) != kEmptyKey_);
+    if (kLockedKey_ == cellKeyPtr(*cell)->load(std::memory_order_acquire)) {
+      FOLLY_SPIN_WAIT(
+        kLockedKey_ == cellKeyPtr(*cell)->load(std::memory_order_acquire)
+      );
+    }
+    DCHECK(relaxedLoadKey(*cell) != kEmptyKey_);
+    DCHECK(relaxedLoadKey(*cell) != kLockedKey_);
+    if (key_in == relaxedLoadKey(*cell)) {
+      // Found an existing entry for our key, but we don't overwrite the
+      // previous value.
+      return SimpleRetT(idx, false);
+    }
+    ++numProbes;
+    if (UNLIKELY(numProbes >= capacity_)) {
+      // probed every cell...fail
+      return SimpleRetT(capacity_, false);
+    }
+  }
+}
+
+
+/*
+ * erase --
+ *
+ *   This will attempt to erase the given key key_in if the key is found. It
+ *   returns 1 iff the key was located and marked as erased, and 0 otherwise.
+ *
+ *   Memory is not freed or reclaimed by erase, i.e. the cell containing the
+ *   erased key will never be reused. If there's an associated value, we won't
+ *   touch it either.
+ */
+template <class KeyT, class ValueT, class HashFcn>
+size_t AtomicHashArray<KeyT, ValueT, HashFcn>::
+erase(KeyT key_in) {
+  CHECK_NE(key_in, kEmptyKey_);
+  CHECK_NE(key_in, kLockedKey_);
+  CHECK_NE(key_in, kErasedKey_);
+  for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0;
+       ;
+       idx = probeNext(idx, numProbes)) {
+    DCHECK_LT(idx, capacity_);
+    value_type* cell = &cells_[idx];
+    if (relaxedLoadKey(*cell) == kEmptyKey_ ||
+        relaxedLoadKey(*cell) == kLockedKey_) {
+      // If we hit an empty (or locked) element, this key does not exist. This
+      // is similar to how it's handled in find().
+      return 0;
+    }
+    if (key_in == relaxedLoadKey(*cell)) {
+      // Found an existing entry for our key, attempt to mark it erased.
+      // Some other thread may have erased our key, but this is ok.
+      KeyT expect = key_in;
+      if (cellKeyPtr(*cell)->compare_exchange_strong(expect, kErasedKey_)) {
+        numErases_.fetch_add(1, std::memory_order_relaxed);
+
+        // Even if there's a value in the cell, we won't delete (or even
+        // default construct) it because some other thread may be accessing it.
+        // Locking it meanwhile won't work either since another thread may be
+        // holding a pointer to it.
+
+        // We found the key and successfully erased it.
+        return 1;
+      }
+      // If another thread succeeds in erasing our key, we'll stop our search.
+      return 0;
+    }
+    ++numProbes;
+    if (UNLIKELY(numProbes >= capacity_)) {
+      // probed every cell...fail
+      return 0;
+    }
+  }
+}
+
+template <class KeyT, class ValueT, class HashFcn>
+const typename AtomicHashArray<KeyT, ValueT, HashFcn>::Config
+AtomicHashArray<KeyT, ValueT, HashFcn>::defaultConfig;
+
+template <class KeyT, class ValueT, class HashFcn>
+typename AtomicHashArray<KeyT, ValueT, HashFcn>::SmartPtr
+AtomicHashArray<KeyT, ValueT, HashFcn>::
+create(size_t maxSize, const Config& c) {
+  CHECK_LE(c.maxLoadFactor, 1.0);
+  CHECK_GT(c.maxLoadFactor, 0.0);
+  CHECK_NE(c.emptyKey, c.lockedKey);
+  size_t capacity = size_t(maxSize / c.maxLoadFactor);
+  size_t sz = sizeof(AtomicHashArray) + sizeof(value_type) * capacity;
+
+  std::unique_ptr<void, void(*)(void*)> mem(malloc(sz), free);
+  new(mem.get()) AtomicHashArray(capacity, c.emptyKey, c.lockedKey, c.erasedKey,
+                                 c.maxLoadFactor, c.entryCountThreadCacheSize);
+  SmartPtr map(static_cast<AtomicHashArray*>(mem.release()));
+
+  /*
+   * Mark all cells as empty.
+   *
+   * Note: we're bending the rules a little here accessing the key
+   * element in our cells even though the cell object has not been
+   * constructed, and casting them to atomic objects (see cellKeyPtr).
+   * (Also, in fact we never actually invoke the value_type
+   * constructor.)  This is in order to avoid needing to default
+   * construct a bunch of value_type when we first start up: if you
+   * have an expensive default constructor for the value type this can
+   * noticably speed construction time for an AHA.
+   */
+  FOR_EACH_RANGE(i, 0, map->capacity_) {
+    cellKeyPtr(map->cells_[i])->store(map->kEmptyKey_,
+      std::memory_order_relaxed);
+  }
+  return map;
+}
+
+template <class KeyT, class ValueT, class HashFcn>
+void AtomicHashArray<KeyT, ValueT, HashFcn>::
+destroy(AtomicHashArray* p) {
+  assert(p);
+  FOR_EACH_RANGE(i, 0, p->capacity_) {
+    if (p->cells_[i].first != p->kEmptyKey_) {
+      p->cells_[i].~value_type();
+    }
+  }
+  p->~AtomicHashArray();
+  free(p);
+}
+
+// clear -- clears all keys and values in the map and resets all counters
+template <class KeyT, class ValueT, class HashFcn>
+void AtomicHashArray<KeyT, ValueT, HashFcn>::
+clear() {
+  FOR_EACH_RANGE(i, 0, capacity_) {
+    if (cells_[i].first != kEmptyKey_) {
+      cells_[i].~value_type();
+      *const_cast<KeyT*>(&cells_[i].first) = kEmptyKey_;
+    }
+    CHECK(cells_[i].first == kEmptyKey_);
+  }
+  numEntries_.set(0);
+  numPendingEntries_.set(0);
+  isFull_.store(0, std::memory_order_relaxed);
+  numErases_.store(0, std::memory_order_relaxed);
+}
+
+
+// Iterator implementation
+
+template <class KeyT, class ValueT, class HashFcn>
+template <class ContT, class IterVal>
+struct AtomicHashArray<KeyT, ValueT, HashFcn>::aha_iterator
+    : boost::iterator_facade<aha_iterator<ContT,IterVal>,
+                             IterVal,
+                             boost::forward_traversal_tag>
+{
+  explicit aha_iterator() : aha_(0) {}
+
+  // Conversion ctor for interoperability between const_iterator and
+  // iterator.  The enable_if<> magic keeps us well-behaved for
+  // is_convertible<> (v. the iterator_facade documentation).
+  template<class OtherContT, class OtherVal>
+  aha_iterator(const aha_iterator<OtherContT,OtherVal>& o,
+               typename std::enable_if<
+               std::is_convertible<OtherVal*,IterVal*>::value >::type* = 0)
+      : aha_(o.aha_)
+      , offset_(o.offset_)
+  {}
+
+  explicit aha_iterator(ContT* array, size_t offset)
+      : aha_(array)
+      , offset_(offset)
+  {
+    advancePastEmpty();
+  }
+
+  // Returns unique index that can be used with findAt().
+  // WARNING: The following function will fail silently for hashtable
+  // with capacity > 2^32
+  uint32_t getIndex() const { return offset_; }
+
+ private:
+  friend class AtomicHashArray;
+  friend class boost::iterator_core_access;
+
+  void increment() {
+    ++offset_;
+    advancePastEmpty();
+  }
+
+  bool equal(const aha_iterator& o) const {
+    return aha_ == o.aha_ && offset_ == o.offset_;
+  }
+
+  IterVal& dereference() const {
+    return aha_->cells_[offset_];
+  }
+
+  void advancePastEmpty() {
+    while (offset_ < aha_->capacity_ && !isValid()) {
+      ++offset_;
+    }
+  }
+
+  bool isValid() const {
+    KeyT key = relaxedLoadKey(aha_->cells_[offset_]);
+    return key != aha_->kEmptyKey_  &&
+      key != aha_->kLockedKey_ &&
+      key != aha_->kErasedKey_;
+  }
+
+ private:
+  ContT* aha_;
+  size_t offset_;
+}; // aha_iterator
+
+} // namespace folly
+
+#undef FOLLY_SPIN_WAIT
diff --git a/folly/AtomicHashArray.h b/folly/AtomicHashArray.h
new file mode 100644
index 00000000..640605e4
--- /dev/null
+++ b/folly/AtomicHashArray.h
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  AtomicHashArray is the building block for AtomicHashMap.  It provides the
+ *  core lock-free functionality, but is limitted by the fact that it cannot
+ *  grow past it's initialization size and is a little more awkward (no public
+ *  constructor, for example).  If you're confident that you won't run out of
+ *  space, don't mind the awkardness, and really need bare-metal performance,
+ *  feel free to use AHA directly.
+ *
+ *  Check out AtomicHashMap.h for more thorough documentation on perf and
+ *  general pros and cons relative to other hash maps.
+ *
+ *  @author Spencer Ahrens <sahrens@fb.com>
+ *  @author Jordan DeLong <delong.j@fb.com>
+ */
+
+#ifndef FOLLY_ATOMICHASHARRAY_H_
+#define FOLLY_ATOMICHASHARRAY_H_
+
+#include <atomic>
+
+#include <boost/iterator/iterator_facade.hpp>
+#include <boost/noncopyable.hpp>
+
+#include "folly/Hash.h"
+#include "folly/ThreadCachedInt.h"
+
+namespace folly {
+
+template <class KeyT, class ValueT, class HashFcn = std::hash<KeyT>>
+class AtomicHashMap;
+
+template <class KeyT, class ValueT, class HashFcn = std::hash<KeyT>>
+class AtomicHashArray : boost::noncopyable {
+  static_assert((std::is_convertible<KeyT,int32_t>::value ||
+                 std::is_convertible<KeyT,int64_t>::value),
+             "You are trying to use AtomicHashArray with disallowed key "
+             "types.  You must use atomically compare-and-swappable integer "
+             "keys, or a different container class.");
+ public:
+  typedef KeyT                key_type;
+  typedef ValueT              mapped_type;
+  typedef std::pair<const KeyT, ValueT> value_type;
+  typedef std::size_t         size_type;
+  typedef std::ptrdiff_t      difference_type;
+  typedef value_type&         reference;
+  typedef const value_type&   const_reference;
+  typedef value_type*         pointer;
+  typedef const value_type*   const_pointer;
+
+  const size_t  capacity_;
+  const size_t  maxEntries_;
+  const KeyT    kEmptyKey_;
+  const KeyT    kLockedKey_;
+  const KeyT    kErasedKey_;
+
+  template<class ContT, class IterVal>
+  struct aha_iterator;
+
+  typedef aha_iterator<const AtomicHashArray,const value_type> const_iterator;
+  typedef aha_iterator<AtomicHashArray,value_type> iterator;
+
+  // You really shouldn't need this if you use the SmartPtr provided by create,
+  // but if you really want to do something crazy like stick the released
+  // pointer into a DescriminatedPtr or something, you'll need this to clean up
+  // after yourself.
+  static void destroy(AtomicHashArray*);
+
+ private:
+  const size_t  kAnchorMask_;
+
+  struct Deleter {
+    void operator()(AtomicHashArray* ptr) {
+      AtomicHashArray::destroy(ptr);
+    }
+  };
+
+ public:
+  typedef std::unique_ptr<AtomicHashArray, Deleter> SmartPtr;
+
+  /*
+   * create --
+   *
+   *   Creates AtomicHashArray objects.  Use instead of constructor/destructor.
+   *
+   *   We do things this way in order to avoid the perf penalty of a second
+   *   pointer indirection when composing these into AtomicHashMap, which needs
+   *   to store an array of pointers so that it can perform atomic operations on
+   *   them when growing.
+   *
+   *   Instead of a mess of arguments, we take a max size and a Config struct to
+   *   simulate named ctor parameters.  The Config struct has sensible defaults
+   *   for everything, but is overloaded - if you specify a positive capacity,
+   *   that will be used directly instead of computing it based on
+   *   maxLoadFactor.
+   *
+   *   Create returns an AHA::SmartPtr which is a unique_ptr with a custom
+   *   deleter to make sure everything is cleaned up properly.
+   */
+  struct Config {
+    KeyT   emptyKey;
+    KeyT   lockedKey;
+    KeyT   erasedKey;
+    double maxLoadFactor;
+    int    entryCountThreadCacheSize;
+    size_t capacity; // if positive, overrides maxLoadFactor
+
+    constexpr Config() : emptyKey(static_cast<KeyT>(-1ul)),
+                         lockedKey(static_cast<KeyT>(-2ul)),
+                         erasedKey(static_cast<KeyT>(-3ul)),
+                         maxLoadFactor(0.8),
+                         entryCountThreadCacheSize(1000),
+                         capacity(0) {}
+  };
+
+  static const Config defaultConfig;
+  static SmartPtr create(size_t maxSize, const Config& = defaultConfig);
+
+  iterator find(KeyT k) {
+    return iterator(this, findInternal(k).idx);
+  }
+  const_iterator find(KeyT k) const {
+    return const_cast<AtomicHashArray*>(this)->find(k);
+  }
+
+  /*
+   * insert --
+   *
+   *   Returns a pair with iterator to the element at r.first and bool success.
+   *   Retrieve the index with ret.first.getIndex().
+   *
+   *   Fails on key collision (does not overwrite) or if map becomes
+   *   full, at which point no element is inserted, iterator is set to end(),
+   *   and success is set false.  On collisions, success is set false, but the
+   *   iterator is set to the existing entry.
+   */
+  std::pair<iterator,bool> insert(const value_type& r) {
+    SimpleRetT ret = insertInternal(r);
+    return std::make_pair(iterator(this, ret.idx), ret.success);
+  }
+
+  // returns the number of elements erased - should never exceed 1
+  size_t erase(KeyT k);
+
+  // clears all keys and values in the map and resets all counters.  Not thread
+  // safe.
+  void clear();
+
+  // Exact number of elements in the map - note that readFull() acquires a
+  // mutex.  See folly/ThreadCachedInt.h for more details.
+  size_t size() const {
+    return numEntries_.readFull() -
+      numErases_.load(std::memory_order_relaxed);
+  }
+
+  bool empty() const { return size() == 0; }
+
+  iterator begin()             { return iterator(this, 0); }
+  iterator end()               { return iterator(this, capacity_); }
+  const_iterator begin() const { return const_iterator(this, 0); }
+  const_iterator end() const   { return const_iterator(this, capacity_); }
+
+  // See AtomicHashMap::findAt - access elements directly
+  // WARNING: The following 2 functions will fail silently for hashtable
+  // with capacity > 2^32
+  iterator findAt(uint32_t idx) {
+    DCHECK_LT(idx, capacity_);
+    return iterator(this, idx);
+  }
+  const_iterator findAt(uint32_t idx) const {
+    return const_cast<AtomicHashArray*>(this)->findAt(idx);
+  }
+
+  iterator makeIter(size_t idx) { return iterator(this, idx); }
+  const_iterator makeIter(size_t idx) const {
+    return const_iterator(this, idx);
+  }
+
+  // The max load factor allowed for this map
+  double maxLoadFactor() const { return ((double) maxEntries_) / capacity_; }
+
+  void setEntryCountThreadCacheSize(uint32_t newSize) {
+    numEntries_.setCacheSize(newSize);
+    numPendingEntries_.setCacheSize(newSize);
+  }
+
+  int getEntryCountThreadCacheSize() const {
+    return numEntries_.getCacheSize();
+  }
+
+  /* Private data and helper functions... */
+
+ private:
+  friend class AtomicHashMap<KeyT,ValueT,HashFcn>;
+
+  struct SimpleRetT { size_t idx; bool success;
+    SimpleRetT(size_t i, bool s) : idx(i), success(s) {}
+    SimpleRetT() {}
+  };
+
+  SimpleRetT insertInternal(const value_type& record);
+
+  SimpleRetT findInternal(const KeyT key);
+
+  static std::atomic<KeyT>* cellKeyPtr(const value_type& r) {
+    // We need some illegal casting here in order to actually store
+    // our value_type as a std::pair<const,>.  But a little bit of
+    // undefined behavior never hurt anyone ...
+    static_assert(sizeof(std::atomic<KeyT>) == sizeof(KeyT),
+                  "std::atomic is implemented in an unexpected way for AHM");
+    return
+      const_cast<std::atomic<KeyT>*>(
+        reinterpret_cast<std::atomic<KeyT> const*>(&r.first));
+  }
+
+  static KeyT relaxedLoadKey(const value_type& r) {
+    return cellKeyPtr(r)->load(std::memory_order_relaxed);
+  }
+
+  // Fun with thread local storage - atomic increment is expensive
+  // (relatively), so we accumulate in the thread cache and periodically
+  // flush to the actual variable, and walk through the unflushed counts when
+  // reading the value, so be careful of calling size() too frequently.  This
+  // increases insertion throughput several times over while keeping the count
+  // accurate.
+  ThreadCachedInt<int64_t> numEntries_;  // Successful key inserts
+  ThreadCachedInt<int64_t> numPendingEntries_; // Used by insertInternal
+  std::atomic<int64_t> isFull_; // Used by insertInternal
+  std::atomic<int64_t> numErases_;   // Successful key erases
+
+  value_type cells_[0];  // This must be the last field of this class
+
+  // Force constructor/destructor private since create/destroy should be
+  // used externally instead
+  AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey,
+                  KeyT erasedKey, double maxLoadFactor, size_t cacheSize);
+
+  ~AtomicHashArray() {}
+
+  inline void unlockCell(value_type* const cell, KeyT newKey) {
+    cellKeyPtr(*cell)->store(newKey, std::memory_order_release);
+  }
+
+  inline bool tryLockCell(value_type* const cell) {
+    KeyT expect = kEmptyKey_;
+    return cellKeyPtr(*cell)->compare_exchange_strong(expect, kLockedKey_,
+      std::memory_order_acquire);
+  }
+
+  inline size_t keyToAnchorIdx(const KeyT k) const {
+    const size_t hashVal = HashFcn()(k);
+    const size_t probe = hashVal & kAnchorMask_;
+    return LIKELY(probe < capacity_) ? probe : hashVal % capacity_;
+  }
+
+  inline size_t probeNext(size_t idx, size_t numProbes) {
+    //idx += numProbes; // quadratic probing
+    idx += 1; // linear probing
+    // Avoid modulus because it's slow
+    return LIKELY(idx < capacity_) ? idx : (idx - capacity_);
+  }
+}; // AtomicHashArray
+
+} // namespace folly
+
+#include "AtomicHashArray-inl.h"
+
+#endif // FOLLY_ATOMICHASHARRAY_H_
diff --git a/folly/AtomicHashMap-inl.h b/folly/AtomicHashMap-inl.h
new file mode 100644
index 00000000..f2738649
--- /dev/null
+++ b/folly/AtomicHashMap-inl.h
@@ -0,0 +1,402 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_ATOMICHASHMAP_H_
+#error "This should only be included by AtomicHashMap.h"
+#endif
+
+#include "folly/detail/AtomicHashUtils.h"
+
+namespace folly {
+
+template <class KeyT, class ValueT, class HashFcn>
+const typename AtomicHashMap<KeyT, ValueT, HashFcn>::Config
+AtomicHashMap<KeyT, ValueT, HashFcn>::defaultConfig;
+
+// AtomicHashMap constructor -- Atomic wrapper that allows growth
+// This class has a lot of overhead (184 Bytes) so only use for big maps
+template <typename KeyT, typename ValueT, typename HashFcn>
+AtomicHashMap<KeyT, ValueT, HashFcn>::
+AtomicHashMap(size_t size, const Config& config)
+  : kGrowthFrac_(1.0 - config.maxLoadFactor) {
+  CHECK(config.maxLoadFactor > 0.0 && config.maxLoadFactor < 1.0);
+  subMaps_[0].store(SubMap::create(size, config).release(),
+    std::memory_order_relaxed);
+  auto numSubMaps = kNumSubMaps_;
+  FOR_EACH_RANGE(i, 1, numSubMaps) {
+    subMaps_[i].store(nullptr, std::memory_order_relaxed);
+  }
+  numMapsAllocated_.store(1, std::memory_order_relaxed);
+}
+
+// insert --
+template <typename KeyT, typename ValueT, typename HashFcn>
+std::pair<typename AtomicHashMap<KeyT,ValueT,HashFcn>::iterator,bool>
+AtomicHashMap<KeyT, ValueT, HashFcn>::
+insert(const value_type& r) {
+  SimpleRetT ret = insertInternal(r);
+  SubMap* subMap = subMaps_[ret.i].load(std::memory_order_relaxed);
+  return std::make_pair(iterator(this, ret.i, subMap->makeIter(ret.j)),
+                        ret.success);
+}
+
+// insertInternal -- Allocates new sub maps as existing ones fill up.
+template <typename KeyT, typename ValueT, typename HashFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn>::SimpleRetT
+AtomicHashMap<KeyT, ValueT, HashFcn>::
+insertInternal(const value_type& r) {
+ beginInsertInternal:
+  int nextMapIdx = // this maintains our state
+    numMapsAllocated_.load(std::memory_order_acquire);
+  uint32_t idx = 0;
+  typename SubMap::SimpleRetT ret;
+  FOR_EACH_RANGE(i, 0, nextMapIdx) {
+    // insert in each map successively.  If one succeeds, we're done!
+    SubMap* subMap = subMaps_[i].load(std::memory_order_relaxed);
+    ret = subMap->insertInternal(r);
+    if (ret.idx == subMap->capacity_) {
+      continue;  //map is full, so try the next one
+    }
+    // Either collision or success - insert in either case
+    return SimpleRetT(i, ret.idx, ret.success);
+  }
+
+  // If we made it this far, all maps are full and we need to try to allocate
+  // the next one.
+
+  SubMap* primarySubMap = subMaps_[0].load(std::memory_order_relaxed);
+  if (nextMapIdx >= kNumSubMaps_ ||
+      primarySubMap->capacity_ * kGrowthFrac_ < 1.0) {
+    // Can't allocate any more sub maps.
+    throw AtomicHashMapFullError();
+  }
+
+  if (tryLockMap(nextMapIdx)) {
+    // Alloc a new map and shove it in.  We can change whatever
+    // we want because other threads are waiting on us...
+    size_t numCellsAllocated = (size_t)
+      (primarySubMap->capacity_ *
+       std::pow(1.0 + kGrowthFrac_, nextMapIdx - 1));
+    size_t newSize = (int) (numCellsAllocated * kGrowthFrac_);
+    DCHECK(subMaps_[nextMapIdx].load(std::memory_order_relaxed) ==
+      (SubMap*)kLockedPtr_);
+    // create a new map using the settings stored in the first map
+
+    Config config;
+    config.emptyKey = primarySubMap->kEmptyKey_;
+    config.lockedKey = primarySubMap->kLockedKey_;
+    config.erasedKey = primarySubMap->kErasedKey_;
+    config.maxLoadFactor = primarySubMap->maxLoadFactor();
+    config.entryCountThreadCacheSize =
+      primarySubMap->getEntryCountThreadCacheSize();
+    subMaps_[nextMapIdx].store(SubMap::create(newSize, config).release(),
+      std::memory_order_relaxed);
+
+    // Publish the map to other threads.
+    numMapsAllocated_.fetch_add(1, std::memory_order_release);
+    DCHECK_EQ(nextMapIdx + 1,
+      numMapsAllocated_.load(std::memory_order_relaxed));
+  } else {
+    // If we lost the race, we'll have to wait for the next map to get
+    // allocated before doing any insertion here.
+    FOLLY_SPIN_WAIT(
+      nextMapIdx >= numMapsAllocated_.load(std::memory_order_acquire)
+    );
+  }
+
+  // Relaxed is ok here because either we just created this map, or we
+  // just did a spin wait with an acquire load on numMapsAllocated_.
+  SubMap* loadedMap = subMaps_[nextMapIdx].load(std::memory_order_relaxed);
+  DCHECK(loadedMap && loadedMap != (SubMap*)kLockedPtr_);
+  ret = loadedMap->insertInternal(r);
+  if (ret.idx != loadedMap->capacity_) {
+    return SimpleRetT(nextMapIdx, ret.idx, ret.success);
+  }
+  // We took way too long and the new map is already full...try again from
+  // the top (this should pretty much never happen).
+  goto beginInsertInternal;
+}
+
+// find --
+template <typename KeyT, typename ValueT, typename HashFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn>::iterator
+AtomicHashMap<KeyT, ValueT, HashFcn>::
+find(KeyT k) {
+  SimpleRetT ret = findInternal(k);
+  if (ret.i >= numMapsAllocated_.load(std::memory_order_acquire)) {
+    return end();
+  }
+  SubMap* subMap = subMaps_[ret.i].load(std::memory_order_relaxed);
+  return iterator(this, ret.i, subMap->makeIter(ret.j));
+}
+
+template <typename KeyT, typename ValueT, typename HashFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn>::const_iterator
+AtomicHashMap<KeyT, ValueT, HashFcn>::
+find(KeyT k) const {
+  return const_cast<AtomicHashMap*>(this)->find(k);
+}
+
+// findInternal --
+template <typename KeyT, typename ValueT, typename HashFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn>::SimpleRetT
+AtomicHashMap<KeyT, ValueT, HashFcn>::
+findInternal(const KeyT k) const {
+  SubMap* const primaryMap = subMaps_[0].load(std::memory_order_relaxed);
+  typename SubMap::SimpleRetT ret = primaryMap->findInternal(k);
+  if (LIKELY(ret.idx != primaryMap->capacity_)) {
+    return SimpleRetT(0, ret.idx, ret.success);
+  }
+  int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
+  FOR_EACH_RANGE(i, 1, numMaps) {
+    // Check each map successively.  If one succeeds, we're done!
+    SubMap* thisMap = subMaps_[i].load(std::memory_order_release);
+    ret = thisMap->findInternal(k);
+    if (LIKELY(ret.idx != thisMap->capacity_)) {
+      return SimpleRetT(i, ret.idx, ret.success);
+    }
+  }
+  // Didn't find our key...
+  return SimpleRetT(numMaps, 0, false);
+}
+
+// findAtInternal -- see encodeIndex() for details.
+template <typename KeyT, typename ValueT, typename HashFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn>::SimpleRetT
+AtomicHashMap<KeyT, ValueT, HashFcn>::
+findAtInternal(uint32_t idx) const {
+  uint32_t subMapIdx, subMapOffset;
+  if (idx & kSecondaryMapBit_) {
+    // idx falls in a secondary map
+    idx &= ~kSecondaryMapBit_;  // unset secondary bit
+    subMapIdx = idx >> kSubMapIndexShift_;
+    DCHECK_LT(subMapIdx, numMapsAllocated_.load(std::memory_order_relaxed));
+    subMapOffset = idx & kSubMapIndexMask_;
+  } else {
+    // idx falls in primary map
+    subMapIdx = 0;
+    subMapOffset = idx;
+  }
+  return SimpleRetT(subMapIdx, subMapOffset, true);
+}
+
+// erase --
+template <typename KeyT, typename ValueT, typename HashFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn>::size_type
+AtomicHashMap<KeyT, ValueT, HashFcn>::
+erase(const KeyT k) {
+  int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
+  FOR_EACH_RANGE(i, 0, numMaps) {
+    // Check each map successively.  If one succeeds, we're done!
+    if (subMaps_[i].load(std::memory_order_relaxed)->erase(k)) {
+      return 1;
+    }
+  }
+  // Didn't find our key...
+  return 0;
+}
+
+// capacity -- summation of capacities of all submaps
+template <typename KeyT, typename ValueT, typename HashFcn>
+size_t AtomicHashMap<KeyT, ValueT, HashFcn>::
+capacity() const {
+  size_t totalCap(0);
+  int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
+  FOR_EACH_RANGE(i, 0, numMaps) {
+    totalCap += subMaps_[i].load(std::memory_order_relaxed)->capacity_;
+  }
+  return totalCap;
+}
+
+// spaceRemaining --
+// number of new insertions until current submaps are all at max load
+template <typename KeyT, typename ValueT, typename HashFcn>
+size_t AtomicHashMap<KeyT, ValueT, HashFcn>::
+spaceRemaining() const {
+  size_t spaceRem(0);
+  int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
+  FOR_EACH_RANGE(i, 0, numMaps) {
+    SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
+    spaceRem += std::max(
+      0,
+      thisMap->maxEntries_ - &thisMap->numEntries_.readFull()
+    );
+  }
+  return spaceRem;
+}
+
+// clear -- Wipes all keys and values from primary map and destroys
+// all secondary maps.  Not thread safe.
+template <typename KeyT, typename ValueT, typename HashFcn>
+void AtomicHashMap<KeyT, ValueT, HashFcn>::
+clear() {
+  subMaps_[0].load(std::memory_order_relaxed)->clear();
+  int const numMaps = numMapsAllocated_
+    .load(std::memory_order_relaxed);
+  FOR_EACH_RANGE(i, 1, numMaps) {
+    SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
+    DCHECK(thisMap);
+    SubMap::destroy(thisMap);
+    subMaps_[i].store(nullptr, std::memory_order_relaxed);
+  }
+  numMapsAllocated_.store(1, std::memory_order_relaxed);
+}
+
+// size --
+template <typename KeyT, typename ValueT, typename HashFcn>
+size_t AtomicHashMap<KeyT, ValueT, HashFcn>::
+size() const {
+  size_t totalSize(0);
+  int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
+  FOR_EACH_RANGE(i, 0, numMaps) {
+    totalSize += subMaps_[i].load(std::memory_order_relaxed)->size();
+  }
+  return totalSize;
+}
+
+// encodeIndex -- Encode the submap index and offset into return.
+// index_ret must be pre-populated with the submap offset.
+//
+// We leave index_ret untouched when referring to the primary map
+// so it can be as large as possible (31 data bits).  Max size of
+// secondary maps is limited by what can fit in the low 27 bits.
+//
+// Returns the following bit-encoded data in index_ret:
+//   if subMap == 0 (primary map) =>
+//     bit(s)          value
+//         31              0
+//       0-30  submap offset (index_ret input)
+//
+//   if subMap > 0 (secondary maps) =>
+//     bit(s)          value
+//         31              1
+//      27-30   which subMap
+//       0-26  subMap offset (index_ret input)
+template <typename KeyT, typename ValueT, typename HashFcn>
+inline uint32_t AtomicHashMap<KeyT, ValueT, HashFcn>::
+encodeIndex(uint32_t subMap, uint32_t offset) {
+  DCHECK_EQ(offset & kSecondaryMapBit_, 0);  // offset can't be too big
+  if (subMap == 0) return offset;
+  // Make sure subMap isn't too big
+  DCHECK_EQ(subMap >> kNumSubMapBits_, 0);
+  // Make sure subMap bits of offset are clear
+  DCHECK_EQ(offset & (~kSubMapIndexMask_ | kSecondaryMapBit_), 0);
+
+  // Set high-order bits to encode which submap this index belongs to
+  return offset | (subMap << kSubMapIndexShift_) | kSecondaryMapBit_;
+}
+
+
+// Iterator implementation
+
+template <typename KeyT, typename ValueT, typename HashFcn>
+template<class ContT, class IterVal, class SubIt>
+struct AtomicHashMap<KeyT, ValueT, HashFcn>::ahm_iterator
+    : boost::iterator_facade<ahm_iterator<ContT,IterVal,SubIt>,
+                             IterVal,
+                             boost::forward_traversal_tag>
+{
+  explicit ahm_iterator() : ahm_(0) {}
+
+  // Conversion ctor for interoperability between const_iterator and
+  // iterator.  The enable_if<> magic keeps us well-behaved for
+  // is_convertible<> (v. the iterator_facade documentation).
+  template<class OtherContT, class OtherVal, class OtherSubIt>
+  ahm_iterator(const ahm_iterator<OtherContT,OtherVal,OtherSubIt>& o,
+               typename std::enable_if<
+               std::is_convertible<OtherSubIt,SubIt>::value >::type* = 0)
+      : ahm_(o.ahm_)
+      , subMap_(o.subMap_)
+      , subIt_(o.subIt_)
+  {}
+
+  /*
+   * Returns the unique index that can be used for access directly
+   * into the data storage.
+   */
+  uint32_t getIndex() const {
+    CHECK(!isEnd());
+    return ahm_->encodeIndex(subMap_, subIt_.getIndex());
+  }
+
+ private:
+  friend class AtomicHashMap;
+  explicit ahm_iterator(ContT* ahm,
+                        uint32_t subMap,
+                        const SubIt& subIt)
+      : ahm_(ahm)
+      , subMap_(subMap)
+      , subIt_(subIt)
+  {
+    checkAdvanceToNextSubmap();
+  }
+
+  friend class boost::iterator_core_access;
+
+  void increment() {
+    CHECK(!isEnd());
+    ++subIt_;
+    checkAdvanceToNextSubmap();
+  }
+
+  bool equal(const ahm_iterator& other) const {
+    if (ahm_ != other.ahm_) {
+      return false;
+    }
+
+    if (isEnd() || other.isEnd()) {
+      return isEnd() == other.isEnd();
+    }
+
+    return subMap_ == other.subMap_ &&
+      subIt_ == other.subIt_;
+  }
+
+  IterVal& dereference() const {
+    return *subIt_;
+  }
+
+  bool isEnd() const { return ahm_ == nullptr; }
+
+  void checkAdvanceToNextSubmap() {
+    if (isEnd()) {
+      return;
+    }
+
+    SubMap* thisMap = ahm_->subMaps_[subMap_].
+      load(std::memory_order_relaxed);
+    if (subIt_ == thisMap->end()) {
+      // This sub iterator is done, advance to next one
+      if (subMap_ + 1 <
+          ahm_->numMapsAllocated_.load(std::memory_order_acquire)) {
+        ++subMap_;
+        thisMap = ahm_->subMaps_[subMap_].load(std::memory_order_relaxed);
+        subIt_ = thisMap->begin();
+      } else {
+        ahm_ = nullptr;
+      }
+    }
+  }
+
+ private:
+  ContT* ahm_;
+  uint32_t subMap_;
+  SubIt subIt_;
+}; // ahm_iterator
+
+} // namespace folly
+
+#undef FOLLY_SPIN_WAIT
diff --git a/folly/AtomicHashMap.h b/folly/AtomicHashMap.h
new file mode 100644
index 00000000..8e02e39e
--- /dev/null
+++ b/folly/AtomicHashMap.h
@@ -0,0 +1,393 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * AtomicHashMap --
+ *
+ * A high performance concurrent hash map with int32 or int64 keys. Supports
+ * insert, find(key), findAt(index), erase(key), size, and more.  Memory cannot
+ * be freed or reclaimed by erase.  Can grow to a maximum of about 18 times the
+ * initial capacity, but performance degrades linearly with growth. Can also be
+ * used as an object store with unique 32-bit references directly into the
+ * internal storage (retrieved with iterator::getIndex()).
+ *
+ * Advantages:
+ *    - High performance (~2-4x tbb::concurrent_hash_map in heavily
+ *      multi-threaded environments).
+ *    - Efficient memory usage if initial capacity is not over estimated
+ *      (especially for small keys and values).
+ *    - Good fragmentation properties (only allocates in large slabs which can
+ *      be reused with clear() and never move).
+ *    - Can generate unique, long-lived 32-bit references for efficient lookup
+ *      (see findAt()).
+ *
+ * Disadvantages:
+ *    - Keys must be native int32 or int64, or explicitly converted.
+ *    - Must be able to specify unique empty, locked, and erased keys
+ *    - Performance degrades linearly as size grows beyond initialization
+ *      capacity.
+ *    - Max size limit of ~18x initial size (dependent on max load factor).
+ *    - Memory is not freed or reclaimed by erase.
+ *
+ * Usage and Operation Details:
+ *   Simple performance/memory tradeoff with maxLoadFactor.  Higher load factors
+ *   give better memory utilization but probe lengths increase, reducing
+ *   performance.
+ *
+ * Implementation and Performance Details:
+ *   AHArray is a fixed size contiguous block of value_type cells.  When
+ *   writing a cell, the key is locked while the rest of the record is
+ *   written.  Once done, the cell is unlocked by setting the key.  find()
+ *   is completely wait-free and doesn't require any non-relaxed atomic
+ *   operations.  AHA cannot grow beyond initialization capacity, but is
+ *   faster because of reduced data indirection.
+ *
+ *   AHMap is a wrapper around AHArray sub-maps that allows growth and provides
+ *   an interface closer to the stl UnorderedAssociativeContainer concept. These
+ *   sub-maps are allocated on the fly and are processed in series, so the more
+ *   there are (from growing past initial capacity), the worse the performance.
+ *
+ *   Insert returns false if there is a key collision and throws if the max size
+ *   of the map is exceeded.
+ *
+ *   Benchmark performance with 8 simultaneous threads processing 1 million
+ *   unique <int64, int64> entries on a 4-core, 2.5 GHz machine:
+ *
+ *     Load Factor   Mem Efficiency   usec/Insert   usec/Find
+ *         50%             50%           0.19         0.05
+ *         85%             85%           0.20         0.06
+ *         90%             90%           0.23         0.08
+ *         95%             95%           0.27         0.10
+ *
+ *   See folly/tests/AtomicHashMapTest.cpp for more benchmarks.
+ *
+ * @author Spencer Ahrens <sahrens@fb.com>
+ * @author Jordan DeLong <delong.j@fb.com>
+ *
+ */
+
+#ifndef FOLLY_ATOMICHASHMAP_H_
+#define FOLLY_ATOMICHASHMAP_H_
+
+#include <boost/iterator/iterator_facade.hpp>
+#include <boost/noncopyable.hpp>
+#include <boost/type_traits/is_convertible.hpp>
+#include <glog/logging.h>
+
+#include <stdexcept>
+#include <functional>
+#include <atomic>
+
+#include "folly/AtomicHashArray.h"
+#include "folly/Foreach.h"
+#include "folly/Hash.h"
+#include "folly/Likely.h"
+#include "folly/ThreadCachedInt.h"
+
+namespace folly {
+
+/*
+ * AtomicHashMap provides an interface somewhat similar to the
+ * UnorderedAssociativeContainer concept in C++.  This does not
+ * exactly match this concept (or even the basic Container concept),
+ * because of some restrictions imposed by our datastructure.
+ *
+ * Specific differences (there are quite a few):
+ *
+ * - Efficiently thread safe for inserts (main point of this stuff),
+ *   wait-free for lookups.
+ *
+ * - You can erase from this container, but the cell containing the key will
+ *   not be free or reclaimed.
+ *
+ * - You can erase everything by calling clear() (and you must guarantee only
+ *   one thread can be using the container to do that).
+ *
+ * - We aren't DefaultConstructible, CopyConstructible, Assignable, or
+ *   EqualityComparable.  (Most of these are probably not something
+ *   you actually want to do with this anyway.)
+ *
+ * - We don't support the various bucket functions, rehash(),
+ *   reserve(), or equal_range().  Also no constructors taking
+ *   iterators, although this could change.
+ *
+ * - Several insertion functions, notably operator[], are not
+ *   implemented.  It is a little too easy to misuse these functions
+ *   with this container, where part of the point is that when an
+ *   insertion happens for a new key, it will atomically have the
+ *   desired value.
+ *
+ * - The map has no templated insert() taking an iterator range, but
+ *   we do provide an insert(key, value).  The latter seems more
+ *   frequently useful for this container (to avoid sprinkling
+ *   make_pair everywhere), and providing both can lead to some gross
+ *   template error messages.
+ *
+ * - Not Allocator-aware.
+ *
+ * - KeyT must be a 32 bit or 64 bit atomic integer type, and you must
+ *   define special 'locked' and 'empty' key values in the ctor
+ *
+ * - We don't take the Hash function object as an instance in the
+ *   constructor.
+ *
+ * - We don't take a Compare template parameter (since our keys must
+ *   be integers, and the underlying hash array here uses atomic
+ *   compare-and-swap instructions, we only allow normal integer
+ *   comparisons).
+ */
+
+// Thrown when insertion fails due to running out of space for
+// submaps.
+struct AtomicHashMapFullError : std::runtime_error {
+  explicit AtomicHashMapFullError()
+    : std::runtime_error("AtomicHashMap is full")
+  {}
+};
+
+template<class KeyT, class ValueT, class HashFcn>
+class AtomicHashMap : boost::noncopyable {
+  typedef AtomicHashArray<KeyT, ValueT, HashFcn> SubMap;
+
+ public:
+  typedef KeyT                key_type;
+  typedef ValueT              mapped_type;
+  typedef std::pair<const KeyT, ValueT> value_type;
+  typedef HashFcn             hasher;
+  typedef std::equal_to<KeyT> key_equal;
+  typedef value_type*         pointer;
+  typedef value_type&         reference;
+  typedef const value_type&   const_reference;
+  typedef std::ptrdiff_t      difference_type;
+  typedef std::size_t         size_type;
+  typedef typename SubMap::Config Config;
+
+  template<class ContT, class IterVal, class SubIt>
+  struct ahm_iterator;
+
+  typedef ahm_iterator<const AtomicHashMap,
+                       const value_type,
+                       typename SubMap::const_iterator>
+    const_iterator;
+  typedef ahm_iterator<AtomicHashMap,
+                       value_type,
+                       typename SubMap::iterator>
+    iterator;
+
+ public:
+  const float kGrowthFrac_;  // How much to grow when we run out of capacity.
+
+  // The constructor takes a finalSizeEst which is the optimal
+  // number of elements to maximize space utilization and performance,
+  // and a Config object to specify more advanced options.
+  static const Config defaultConfig;
+  explicit AtomicHashMap(size_t finalSizeEst, const Config& = defaultConfig);
+
+  ~AtomicHashMap() {
+    const int numMaps = numMapsAllocated_.load(std::memory_order_relaxed);
+    FOR_EACH_RANGE (i, 0, numMaps) {
+      SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
+      DCHECK(thisMap);
+      SubMap::destroy(thisMap);
+    }
+  }
+
+  key_equal key_eq() const { return key_eq(); }
+  hasher hash_function() const { return hasher(); }
+
+  // TODO: emplace() support would be nice.
+
+  /*
+   * insert --
+   *
+   *   Returns a pair with iterator to the element at r.first and
+   *   success.  Retrieve the index with ret.first.getIndex().
+   *
+   *   Does not overwrite on key collision, but returns an iterator to
+   *   the existing element (since this could due to a race with
+   *   another thread, it is often important to check this return
+   *   value).
+   *
+   *   Allocates new sub maps as the existing ones become full.  If
+   *   all sub maps are full, no element is inserted, and
+   *   AtomicHashMapFullError is thrown.
+   */
+  std::pair<iterator,bool> insert(const value_type& r);
+  std::pair<iterator,bool> insert(key_type k, const mapped_type& v) {
+    return insert(value_type(k, v));
+  }
+
+  /*
+   * find --
+   *
+   *   Returns an iterator into the map.
+   *
+   *   If the key is not found, returns end().
+   */
+  iterator find(key_type k);
+  const_iterator find(key_type k) const;
+
+  /*
+   * erase --
+   *
+   *   Erases key k from the map
+   *
+   *   Returns 1 iff the key is found and erased, and 0 otherwise.
+   */
+  size_type erase(key_type k);
+
+  /*
+   * clear --
+   *
+   *   Wipes all keys and values from primary map and destroys all secondary
+   *   maps.  Primary map remains allocated and thus the memory can be reused
+   *   in place.  Not thread safe.
+   *
+   */
+  void clear();
+
+  /*
+   * size --
+   *
+   *  Returns the exact size of the map.  Note this is not as cheap as typical
+   *  size() implementations because, for each AtomicHashArray in this AHM, we
+   *  need to grab a lock and accumulate the values from all the thread local
+   *  counters.  See folly/ThreadCachedInt.h for more details.
+   */
+  size_t size() const;
+
+  bool empty() const { return size() == 0; }
+
+  size_type count(key_type k) const {
+    return find(k) == end() ? 0 : 1;
+  }
+
+
+  /*
+   * findAt --
+   *
+   *   Returns an iterator into the map.
+   *
+   *   idx should only be an unmodified value returned by calling getIndex() on
+   *   a valid iterator returned by find() or insert(). If idx is invalid you
+   *   have a bug and the process aborts.
+   */
+  iterator findAt(uint32_t idx) {
+    SimpleRetT ret = findAtInternal(idx);
+    DCHECK_LT(ret.i, numSubMaps());
+    return iterator(this, ret.i,
+      subMaps_[ret.i].load(std::memory_order_relaxed)->makeIter(ret.j));
+  }
+  const_iterator findAt(uint32_t idx) const {
+    return const_cast<AtomicHashMap*>(this)->findAt(idx);
+  }
+
+  // Total capacity - summation of capacities of all submaps.
+  size_t capacity() const;
+
+  // Number of new insertions until current submaps are all at max load factor.
+  size_t spaceRemaining() const;
+
+  void setEntryCountThreadCacheSize(int32_t newSize) {
+    const int numMaps = numMapsAllocated_.load(std::memory_order_acquire);
+    for (int i = 0; i < numMaps; ++i) {
+      SubMap* map = subMaps_[i].load(std::memory_order_relaxed);
+      map->setEntryCountThreadCacheSize(newSize);
+    }
+  }
+
+  // Number of sub maps allocated so far to implement this map.  The more there
+  // are, the worse the performance.
+  int numSubMaps() const {
+    return numMapsAllocated_.load(std::memory_order_acquire);
+  }
+
+  iterator begin() {
+    return iterator(this, 0,
+      subMaps_[0].load(std::memory_order_relaxed)->begin());
+  }
+
+  iterator end() {
+    return iterator();
+  }
+
+  const_iterator begin() const {
+    return const_iterator(this, 0,
+      subMaps_[0].load(std::memory_order_relaxed)->begin());
+  }
+
+  const_iterator end() const {
+    return const_iterator();
+  }
+
+  /* Advanced functions for direct access: */
+
+  inline uint32_t recToIdx(const value_type& r, bool mayInsert = true) {
+    SimpleRetT ret = mayInsert ? insertInternal(r) : findInternal(r.first);
+    return encodeIndex(ret.i, ret.j);
+  }
+
+  inline uint32_t keyToIdx(const KeyT k, bool mayInsert = false) {
+    return recToIdx(value_type(k), mayInsert);
+  }
+
+  inline const value_type& idxToRec(uint32_t idx) const {
+    SimpleRetT ret = findAtInternal(idx);
+    return subMaps_[ret.i].load(std::memory_order_relaxed)->idxToRec(ret.j);
+  }
+
+  /* Private data and helper functions... */
+
+ private:
+  // This limits primary submap size to 2^31 ~= 2 billion, secondary submap
+  // size to 2^(32 - kNumSubMapBits_ - 1) = 2^27 ~= 130 million, and num subMaps
+  // to 2^kNumSubMapBits_ = 16.
+  static const uint32_t  kNumSubMapBits_     = 4;
+  static const uint32_t  kSecondaryMapBit_   = 1u << 31; // Highest bit
+  static const uint32_t  kSubMapIndexShift_  = 32 - kNumSubMapBits_ - 1;
+  static const uint32_t  kSubMapIndexMask_   = (1 << kSubMapIndexShift_) - 1;
+  static const uint32_t  kNumSubMaps_        = 1 << kNumSubMapBits_;
+  static const uintptr_t kLockedPtr_         = 0x88ul << 48; // invalid pointer
+
+  struct SimpleRetT { uint32_t i; size_t j; bool success;
+    SimpleRetT(uint32_t ii, size_t jj, bool s) : i(ii), j(jj), success(s) {}
+    SimpleRetT() {}
+  };
+
+  SimpleRetT insertInternal(const value_type& r);
+
+  SimpleRetT findInternal(const KeyT k) const;
+
+  SimpleRetT findAtInternal(const uint32_t idx) const;
+
+  std::atomic<SubMap*> subMaps_[kNumSubMaps_];
+  std::atomic<uint32_t> numMapsAllocated_;
+
+  inline bool tryLockMap(int idx) {
+    SubMap* val = nullptr;
+    return subMaps_[idx].compare_exchange_strong(val, (SubMap*)kLockedPtr_,
+      std::memory_order_acquire);
+  }
+
+  static inline uint32_t encodeIndex(uint32_t subMap, uint32_t subMapIdx);
+
+}; // AtomicHashMap
+
+} // namespace folly
+
+#include "AtomicHashMap-inl.h"
+
+#endif // FOLLY_ATOMICHASHMAP_H_
diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp
new file mode 100644
index 00000000..93aa2efe
--- /dev/null
+++ b/folly/Benchmark.cpp
@@ -0,0 +1,393 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
+
+#include "Benchmark.h"
+#include "Foreach.h"
+#include "String.h"
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <utility>
+#include <vector>
+
+using namespace std;
+
+DEFINE_bool(benchmark, false, "Run benchmarks.");
+
+namespace folly {
+
+BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
+
+typedef function<uint64_t(unsigned int)> BenchmarkFun;
+static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;
+
+// Add the global baseline
+BENCHMARK(globalBenchmarkBaseline) {
+  asm volatile("");
+}
+
+void detail::addBenchmarkImpl(const char* file, const char* name,
+                              BenchmarkFun fun) {
+  benchmarks.emplace_back(file, name, std::move(fun));
+}
+
+/**
+ * Given a point, gives density at that point as a number 0.0 < x <=
+ * 1.0. The result is 1.0 if all samples are equal to where, and
+ * decreases near 0 if all points are far away from it. The density is
+ * computed with the help of a radial basis function.
+ */
+static double density(const double * begin, const double *const end,
+                      const double where, const double bandwidth) {
+  assert(begin < end);
+  assert(bandwidth > 0.0);
+  double sum = 0.0;
+  FOR_EACH_RANGE (i, begin, end) {
+    auto d = (*i - where) / bandwidth;
+    sum += exp(- d * d);
+  }
+  return sum / (end - begin);
+}
+
+/**
+ * Computes mean and variance for a bunch of data points. Note that
+ * mean is currently not being used.
+ */
+static pair<double, double>
+meanVariance(const double * begin, const double *const end) {
+  assert(begin < end);
+  double sum = 0.0, sum2 = 0.0;
+  FOR_EACH_RANGE (i, begin, end) {
+    sum += *i;
+    sum2 += *i * *i;
+  }
+  auto const n = end - begin;
+  return make_pair(sum / n, sqrt((sum2 - sum * sum / n) / n));
+}
+
+/**
+ * Computes the mode of a sample set through brute force. Assumes
+ * input is sorted.
+ */
+static double mode(const double * begin, const double *const end) {
+  assert(begin < end);
+  // Lower bound and upper bound for result and their respective
+  // densities.
+  auto
+    result = 0.0,
+    bestDensity = 0.0;
+
+  // Get the variance so we pass it down to density()
+  auto const sigma = meanVariance(begin, end).second;
+  if (!sigma) {
+    // No variance means constant signal
+    return *begin;
+  }
+
+  FOR_EACH_RANGE (i, begin, end) {
+    assert(i == begin || *i >= i[-1]);
+    auto candidate = density(begin, end, *i, sigma * sqrt(2.0));
+    if (candidate > bestDensity) {
+      // Found a new best
+      bestDensity = candidate;
+      result = *i;
+    } else {
+      // Density is decreasing... we could break here if we definitely
+      // knew this is unimodal.
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Given a bunch of benchmark samples, estimate the actual run time.
+ */
+static double estimateTime(double * begin, double * end) {
+  assert(begin < end);
+
+  // Current state of the art: get the minimum. After some
+  // experimentation, it seems taking the minimum is the best.
+
+  return *min_element(begin, end);
+
+  // What follows after estimates the time as the mode of the
+  // distribution.
+
+  // Select the awesomest (i.e. most frequent) result. We do this by
+  // sorting and then computing the longest run length.
+  sort(begin, end);
+
+  // Eliminate outliers. A time much larger than the minimum time is
+  // considered an outlier.
+  while (end[-1] > 2.0 * *begin) {
+    --end;
+    if (begin == end) {
+      LOG(INFO) << *begin;
+    }
+    assert(begin < end);
+  }
+
+  double result = 0;
+
+  /* Code used just for comparison purposes */ {
+    unsigned bestFrequency = 0;
+    unsigned candidateFrequency = 1;
+    double candidateValue = *begin;
+    for (auto current = begin + 1; ; ++current) {
+      if (current == end || *current != candidateValue) {
+        // Done with the current run, see if it was best
+        if (candidateFrequency > bestFrequency) {
+          bestFrequency = candidateFrequency;
+          result = candidateValue;
+        }
+        if (current == end) {
+          break;
+        }
+        // Start a new run
+        candidateValue = *current;
+        candidateFrequency = 1;
+      } else {
+        // Cool, inside a run, increase the frequency
+        ++candidateFrequency;
+      }
+    }
+  }
+
+  result = mode(begin, end);
+
+  return result;
+}
+
+static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
+                                            const double globalBaseline) {
+  // They key here is accuracy; too low numbers means the accuracy was
+  // coarse. We up the ante until we get to at least minNanoseconds
+  // timings.
+  static uint64_t resolutionInNs = 0, coarseResolutionInNs = 0;
+  if (!resolutionInNs) {
+    timespec ts;
+    CHECK_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts));
+    CHECK_EQ(0, ts.tv_sec) << "Clock sucks.";
+    CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good.";
+    CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
+    resolutionInNs = ts.tv_nsec;
+  }
+  // Whe choose a minimum minimum (sic) of 10,000 nanoseconds, but if
+  // the clock resolution is worse than that, it will be larger. In
+  // essence we're aiming at making the quantization noise 0.01%.
+  static const auto minNanoseconds = min(resolutionInNs * 100000, 1000000000UL);
+
+  // We do measurements in several epochs and take the minimum, to
+  // account for jitter.
+  static const unsigned int epochs = 1000;
+  // We establish a total time budget as we don't want a measurement
+  // to take too long. This will curtail the number of actual epochs.
+  static const uint64_t timeBudgetInNs = 1000000000;
+  timespec global;
+  CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
+
+  double epochResults[epochs] = { 0 };
+  size_t actualEpochs = 0;
+
+  for (; actualEpochs < epochs; ++actualEpochs) {
+    for (unsigned int n = 1; n < (1U << 30); n *= 2) {
+      auto const nsecs = fun(n);
+      if (nsecs < minNanoseconds) {
+        continue;
+      }
+      // We got an accurate enough timing, done. But only save if
+      // smaller than the current result.
+      epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline);
+      // Done with the current epoch, we got a meaningful timing.
+      break;
+    }
+    timespec now;
+    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
+    if (detail::timespecDiff(now, global) >= timeBudgetInNs) {
+      // No more time budget available.
+      ++actualEpochs;
+      break;
+    }
+  }
+
+  // If the benchmark was basically drowned in baseline noise, it's
+  // possible it became negative.
+  return max(0.0, estimateTime(epochResults, epochResults + actualEpochs));
+}
+
+static string humanReadable(double n, unsigned int decimals) {
+  auto a = fabs(n);
+  char suffix = ' ';
+
+  if (a >= 1E21) {
+    // Too big to be comprehended by the puny human brain
+    suffix = '!';
+    n /= 1E21;
+  } else if (a >= 1E18) {
+    // "EXA" written with suffix 'X' so as to not create confusion
+    // with scientific notation.
+    suffix = 'X';
+    n /= 1E18;
+  } else if (a >= 1E15) {
+    // "PETA"
+    suffix = 'P';
+    n /= 1E15;
+  } else if (a >= 1E12) {
+    // "TERA"
+    suffix = 'T';
+    n /= 1E12;
+  } else if (a >= 1E9) {
+    // "GIGA"
+    suffix = 'G';
+    n /= 1E9;
+  } else if (a >= 1E6) {
+    // "MEGA"
+    suffix = 'M';
+    n /= 1E6;
+  } else if (a >= 1E3) {
+    // "KILO"
+    suffix = 'K';
+    n /= 1E3;
+  } else if (a == 0.0) {
+    suffix = ' ';
+  } else if (a < 1E-15) {
+    // too small
+    suffix = '?';
+    n *= 1E18;
+  } else if (a < 1E-12) {
+    // "femto"
+    suffix = 'f';
+    n *= 1E15;
+  } else if (a < 1E-9) {
+    // "pico"
+    suffix = 'p';
+    n *= 1E12;
+  } else if (a < 1E-6) {
+    // "nano"
+    suffix = 'n';
+    n *= 1E9;
+  } else if (a < 1E-3) {
+    // "micro"
+    suffix = 'u';
+    n *= 1E6;
+  } else if (a < 1) {
+    // "mili"
+    suffix = 'm';
+    n *= 1E3;
+  }
+
+  return stringPrintf("%*.*f%c", decimals + 3 + 1, decimals, n, suffix);
+}
+
+static void printBenchmarkResults(
+  const vector<tuple<const char*, const char*, double> >& data) {
+  // Width available
+  static const uint columns = 76;
+
+  // Compute the longest benchmark name
+  size_t longestName = 0;
+  FOR_EACH_RANGE (i, 1, benchmarks.size()) {
+    longestName = max(longestName, strlen(get<1>(benchmarks[i])));
+  }
+
+  // Print a horizontal rule
+  auto separator = [&](char pad) {
+    puts(string(columns, pad).c_str());
+  };
+
+  // Print header for a file
+  auto header = [&](const char* file) {
+    separator('=');
+    printf("%-*srelative  ns/iter  iters/s\n",
+           columns - 26, file);
+    separator('=');
+  };
+
+  double baselineNsPerIter = numeric_limits<double>::max();
+  const char* lastFile = "";
+
+  for (auto& datum : data) {
+    auto file = get<0>(datum);
+    if (strcmp(file, lastFile)) {
+      // New file starting
+      header(file);
+      lastFile = file;
+    }
+
+    string s = get<1>(datum);
+    if (s == "-") {
+      separator('-');
+      continue;
+    }
+    bool useBaseline /* = void */;
+    if (s[0] == '%') {
+      s.erase(0, 1);
+      useBaseline = true;
+    } else {
+      baselineNsPerIter = get<2>(datum);
+      useBaseline = false;
+    }
+    s.resize(columns - 27, ' ');
+    auto nsPerIter = get<2>(datum);
+    auto itersPerSec = 1E9 / nsPerIter;
+    if (!useBaseline) {
+      // Print without baseline
+      printf("%*s           %s  %s\n",
+             static_cast<int>(s.size()), s.c_str(),
+             humanReadable(nsPerIter, 2).c_str(),
+             humanReadable(itersPerSec, 2).c_str());
+    } else {
+      // Print with baseline
+      auto rel = baselineNsPerIter / nsPerIter * 100.0;
+      printf("%*s %7.2f%%  %s  %s\n",
+             static_cast<int>(s.size()), s.c_str(),
+             rel,
+             humanReadable(nsPerIter, 2).c_str(),
+             humanReadable(itersPerSec, 2).c_str());
+    }
+  }
+  separator('=');
+}
+
+void runBenchmarks() {
+  CHECK(!benchmarks.empty());
+
+  vector<tuple<const char*, const char*, double>> results;
+  results.reserve(benchmarks.size() - 1);
+
+  // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
+
+  auto const globalBaseline = runBenchmarkGetNSPerIteration(
+    get<2>(benchmarks.front()), 0);
+  FOR_EACH_RANGE (i, 1, benchmarks.size()) {
+    auto elapsed = strcmp(get<1>(benchmarks[i]), "-") == 0
+      ? 0.0 // skip the separators
+      : runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
+                                      globalBaseline);
+    results.emplace_back(get<0>(benchmarks[i]),
+                         get<1>(benchmarks[i]), elapsed);
+  }
+
+  // PLEASE MAKE NOISE. MEASUREMENTS DONE.
+
+  printBenchmarkResults(results);
+}
+
+} // namespace folly
diff --git a/folly/Benchmark.h b/folly/Benchmark.h
new file mode 100644
index 00000000..32950e0f
--- /dev/null
+++ b/folly/Benchmark.h
@@ -0,0 +1,375 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_BENCHMARK_H_
+#define FOLLY_BENCHMARK_H_
+
+#include "folly/Preprocessor.h" // for FB_ANONYMOUS_VARIABLE
+#include <cassert>
+#include <ctime>
+#include <boost/function_types/function_arity.hpp>
+#include <functional>
+#include <glog/logging.h>
+#include <gflags/gflags.h>
+#include <limits>
+
+DECLARE_bool(benchmark);
+
+namespace folly {
+
+/**
+ * Runs all benchmarks defined. Usually put in main().
+ */
+void runBenchmarks();
+
+/**
+ * Runs all benchmarks defined if and only if the --benchmark flag has
+ * been passed to the program. Usually put in main().
+ */
+inline bool runBenchmarksOnFlag() {
+  if (FLAGS_benchmark) {
+    runBenchmarks();
+  }
+  return FLAGS_benchmark;
+}
+
+namespace detail {
+
+/**
+ * This is the clock ID used for measuring time. On older kernels, the
+ * resolution of this clock will be very coarse, which will cause the
+ * benchmarks to fail.
+ */
+enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
+
+/**
+ * Adds a benchmark wrapped in a std::function. Only used
+ * internally. Pass by value is intentional.
+ */
+void addBenchmarkImpl(const char* file,
+                      const char* name,
+                      std::function<uint64_t(unsigned int)>);
+
+/**
+ * Takes the difference between two timespec values. end is assumed to
+ * occur after start.
+ */
+inline uint64_t timespecDiff(timespec end, timespec start) {
+  if (end.tv_sec == start.tv_sec) {
+    assert(end.tv_nsec >= start.tv_nsec);
+    return end.tv_nsec - start.tv_nsec;
+  }
+  assert(end.tv_sec > start.tv_sec &&
+         end.tv_sec - start.tv_sec <
+         std::numeric_limits<uint64_t>::max() / 1000000000UL);
+  return (end.tv_sec - start.tv_sec) * 1000000000UL
+    + end.tv_nsec - start.tv_nsec;
+}
+
+/**
+ * Takes the difference between two sets of timespec values. The first
+ * two come from a high-resolution clock whereas the other two come
+ * from a low-resolution clock. The crux of the matter is that
+ * high-res values may be bogus as documented in
+ * http://linux.die.net/man/3/clock_gettime. The trouble is when the
+ * running process migrates from one CPU to another, which is more
+ * likely for long-running processes. Therefore we watch for high
+ * differences between the two timings.
+ *
+ * This function is subject to further improvements.
+ */
+inline uint64_t timespecDiff(timespec end, timespec start,
+                             timespec endCoarse, timespec startCoarse) {
+  auto fine = timespecDiff(end, start);
+  auto coarse = timespecDiff(endCoarse, startCoarse);
+  if (coarse - fine >= 1000000) {
+    // The fine time is in all likelihood bogus
+    return coarse;
+  }
+  return fine;
+}
+
+} // namespace detail
+
+/**
+ * Supporting type for BENCHMARK_SUSPEND defined below.
+ */
+struct BenchmarkSuspender {
+  BenchmarkSuspender() {
+    CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
+  }
+
+  BenchmarkSuspender(const BenchmarkSuspender &) = delete;
+  BenchmarkSuspender(BenchmarkSuspender && rhs) {
+    start = rhs.start;
+    rhs.start.tv_nsec = rhs.start.tv_sec = 0;
+  }
+
+  BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
+  BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
+    if (start.tv_nsec > 0 || start.tv_sec > 0) {
+      tally();
+    }
+    start = rhs.start;
+    rhs.start.tv_nsec = rhs.start.tv_sec = 0;
+    return *this;
+  }
+
+  ~BenchmarkSuspender() {
+    if (start.tv_nsec > 0 || start.tv_sec > 0) {
+      tally();
+    }
+  }
+
+  void dismiss() {
+    assert(start.tv_nsec > 0 || start.tv_sec > 0);
+    tally();
+    start.tv_nsec = start.tv_sec = 0;
+  }
+
+  void rehire() {
+    assert(start.tv_nsec == 0 || start.tv_sec == 0);
+    CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
+  }
+
+  /**
+   * This helps the macro definition. To get around the dangers of
+   * operator bool, returns a pointer to member (which allows no
+   * arithmetic).
+   */
+  operator int BenchmarkSuspender::*() const {
+    return nullptr;
+  }
+
+  /**
+   * Accumulates nanoseconds spent outside benchmark.
+   */
+  typedef uint64_t NanosecondsSpent;
+  static NanosecondsSpent nsSpent;
+
+private:
+  void tally() {
+    timespec end;
+    CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
+    nsSpent += detail::timespecDiff(end, start);
+    start = end;
+  }
+
+  timespec start;
+};
+
+/**
+ * Adds a benchmark. Usually not called directly but instead through
+ * the macro BENCHMARK defined below. The lambda function involved
+ * must take exactly one parameter of type unsigned, and the benchmark
+ * uses it with counter semantics (iteration occurs inside the
+ * function).
+ */
+template <typename Lambda>
+typename std::enable_if<
+  boost::function_types::function_arity<decltype(&Lambda::operator())>::value
+  == 2
+>::type
+addBenchmark(const char* file, const char* name, Lambda&& lambda) {
+  auto execute = [=](unsigned int times) {
+    BenchmarkSuspender::nsSpent = 0;
+    timespec start, end;
+
+    // CORE MEASUREMENT STARTS
+    CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
+    lambda(times);
+    CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
+    // CORE MEASUREMENT ENDS
+
+    return detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent;
+  };
+
+  detail::addBenchmarkImpl(file, name,
+                           std::function<uint64_t(unsigned int)>(execute));
+}
+
+/**
+ * Adds a benchmark. Usually not called directly but instead through
+ * the macro BENCHMARK defined below. The lambda function involved
+ * must take zero parameters, and the benchmark calls it repeatedly
+ * (iteration occurs outside the function).
+ */
+template <typename Lambda>
+typename std::enable_if<
+  boost::function_types::function_arity<decltype(&Lambda::operator())>::value
+  == 1
+>::type
+addBenchmark(const char* file, const char* name, Lambda&& lambda) {
+  addBenchmark(file, name, [=](unsigned int times) {
+      while (times-- > 0) {
+        lambda();
+      }
+    });
+}
+
+/**
+ * Call doNotOptimizeAway(var) against variables that you use for
+ * benchmarking but otherwise are useless. The compiler tends to do a
+ * good job at eliminating unused variables, and this function fools
+ * it into thinking var is in fact needed.
+ */
+template <class T>
+void doNotOptimizeAway(T&& datum) {
+  asm volatile("" : "+r" (datum));
+}
+
+} // namespace folly
+
+/**
+ * Introduces a benchmark function. Used internally, see BENCHMARK and
+ * friends below.
+ */
+#define BENCHMARK_IMPL(funName, stringName, paramType, paramName)       \
+  static void funName(paramType);                                       \
+  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (           \
+    ::folly::addBenchmark(__FILE__, stringName,                         \
+      [](paramType paramName) { funName(paramName); }),                 \
+    true);                                                              \
+  static void funName(paramType paramName)
+
+/**
+ * Introduces a benchmark function. Use with either one one or two
+ * arguments. The first is the name of the benchmark. Use something
+ * descriptive, such as insertVectorBegin. The second argument may be
+ * missing, or could be a symbolic counter. The counter dictates how
+ * many internal iteration the benchmark does. Example:
+ *
+ * BENCHMARK(vectorPushBack) {
+ *   vector<int> v;
+ *   v.push_back(42);
+ * }
+ *
+ * BENCHMARK(insertVectorBegin, n) {
+ *   vector<int> v;
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     v.insert(v.begin(), 42);
+ *   }
+ * }
+ */
+#define BENCHMARK(name, ...)                                    \
+  BENCHMARK_IMPL(                                               \
+    name,                                                       \
+    FB_STRINGIZE(name),                                         \
+    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
+    __VA_ARGS__)
+
+/**
+ * Defines a benchmark that passes a parameter to another one. This is
+ * common for benchmarks that need a "problem size" in addition to
+ * "number of iterations". Consider:
+ *
+ * void pushBack(uint n, size_t initialSize) {
+ *   vector<int> v;
+ *   BENCHMARK_SUSPEND {
+ *     v.resize(initialSize);
+ *   }
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *    v.push_back(i);
+ *   }
+ * }
+ * BENCHMARK_PARAM(pushBack, 0)
+ * BENCHMARK_PARAM(pushBack, 1000)
+ * BENCHMARK_PARAM(pushBack, 1000000)
+ *
+ * The benchmark above estimates the speed of push_back at different
+ * initial sizes of the vector. The framework will pass 0, 1000, and
+ * 1000000 for initialSize, and the iteration count for n.
+ */
+#define BENCHMARK_PARAM(name, param)                                    \
+  BENCHMARK_IMPL(                                                       \
+      FB_CONCATENATE(name, FB_CONCATENATE(_, param)),                   \
+      FB_STRINGIZE(name) "(" FB_STRINGIZE(param) ")",                   \
+      unsigned,                                                         \
+      iters) {                                                          \
+    name(iters, param);                                                 \
+  }
+
+/**
+ * Just like BENCHMARK, but prints the time relative to a
+ * baseline. The baseline is the most recent BENCHMARK() seen in
+ * lexical order. Example:
+ *
+ * // This is the baseline
+ * BENCHMARK(insertVectorBegin, n) {
+ *   vector<int> v;
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     v.insert(v.begin(), 42);
+ *   }
+ * }
+ *
+ * BENCHMARK_RELATIVE(insertListBegin, n) {
+ *   list<int> s;
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     s.insert(s.begin(), 42);
+ *   }
+ * }
+ *
+ * Any number of relative benchmark can be associated with a
+ * baseline. Another BENCHMARK() occurrence effectively establishes a
+ * new baseline.
+ */
+#define BENCHMARK_RELATIVE(name, ...)                           \
+  BENCHMARK_IMPL(                                               \
+    name,                                                       \
+    "%" FB_STRINGIZE(name),                                     \
+    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
+    __VA_ARGS__)
+
+/**
+ * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM.
+ */
+#define BENCHMARK_RELATIVE_PARAM(name, param)                           \
+  BENCHMARK_IMPL(                                                       \
+      FB_CONCATENATE(name, FB_CONCATENATE(_, param)),                   \
+      "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param) ")",               \
+      unsigned,                                                         \
+      iters) {                                                          \
+    name(iters, param);                                                 \
+  }
+
+/**
+ * Draws a line of dashes.
+ */
+#define BENCHMARK_DRAW_LINE()                                   \
+  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (   \
+    ::folly::addBenchmark(__FILE__, "-", []() { }),             \
+    true);
+
+/**
+ * Allows execution of code that doesn't count torward the benchmark's
+ * time budget. Example:
+ *
+ * BENCHMARK_START_GROUP(insertVectorBegin, n) {
+ *   vector<int> v;
+ *   SUSPEND_BENCHMARK {
+ *     v.reserve(n);
+ *   }
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     v.insert(v.begin(), 42);
+ *   }
+ * }
+ */
+#define BENCHMARK_SUSPEND                               \
+  if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) =   \
+      ::folly::BenchmarkSuspender()) {}                 \
+  else
+
+#endif // FOLLY_BENCHMARK_H_
diff --git a/folly/Bits.h b/folly/Bits.h
new file mode 100644
index 00000000..32f51068
--- /dev/null
+++ b/folly/Bits.h
@@ -0,0 +1,519 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Various low-level, bit-manipulation routines.
+ *
+ * findFirstSet(x)
+ *    find first (least significant) bit set in a value of an integral type,
+ *    1-based (like ffs()).  0 = no bits are set (x == 0)
+ *
+ * findLastSet(x)
+ *    find last (most significant) bit set in a value of an integral type,
+ *    1-based.  0 = no bits are set (x == 0)
+ *    for x != 0, findFirstSet(x) == 1 + floor(log2(x))
+ *
+ * nextPowTwo(x)
+ *    Finds the next power of two >= x.
+ *
+ * Endian
+ *    convert between native, big, and little endian representation
+ *    Endian::big(x)      big <-> native
+ *    Endian::little(x)   little <-> native
+ *    Endian::swap(x)     big <-> little
+ *
+ * BitIterator
+ *    Wrapper around an iterator over an integral type that iterates
+ *    over its underlying bits in MSb to LSb order
+ *
+ * findFirstSet(BitIterator begin, BitIterator end)
+ *    return a BitIterator pointing to the first 1 bit in [begin, end), or
+ *    end if all bits in [begin, end) are 0
+ *
+ * @author Tudor Bosman (tudorb@fb.com)
+ */
+
+#ifndef FOLLY_BITS_H_
+#define FOLLY_BITS_H_
+
+#include "folly/Portability.h"
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#include "folly/detail/BitIteratorDetail.h"
+#include "folly/Likely.h"
+
+#include <byteswap.h>
+#include <cassert>
+#include <cinttypes>
+#include <cstring>  // for ffs, ffsl, ffsll
+#include <endian.h>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+#include <boost/iterator/iterator_adaptor.hpp>
+#include <stdint.h>
+
+namespace folly {
+
+// Generate overloads for findFirstSet as wrappers around
+// appropriate ffs, ffsl, ffsll functions from glibc.
+// We first define these overloads for signed types (because ffs, ffsl, ffsll
+// take int, long, and long long as arguments, respectively) and then
+// define an overload for unsigned that forwards to the overload for the
+// corresponding signed type.
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_signed<T>::value &&
+   (std::numeric_limits<T>::digits <= std::numeric_limits<int>::digits)),
+  unsigned int>::type
+  findFirstSet(T x) {
+  return ::ffs(static_cast<int>(x));
+}
+
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_signed<T>::value &&
+   (std::numeric_limits<T>::digits > std::numeric_limits<int>::digits) &&
+   (std::numeric_limits<T>::digits <= std::numeric_limits<long>::digits)),
+  unsigned int>::type
+  findFirstSet(T x) {
+  return ::ffsl(static_cast<long>(x));
+}
+
+#ifdef FOLLY_HAVE_FFSLL
+
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_signed<T>::value &&
+   (std::numeric_limits<T>::digits > std::numeric_limits<long>::digits) &&
+   (std::numeric_limits<T>::digits <= std::numeric_limits<long long>::digits)),
+  unsigned int>::type
+  findFirstSet(T x) {
+  return ::ffsll(static_cast<long long>(x));
+}
+
+#endif
+
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   !std::is_signed<T>::value),
+  unsigned int>::type
+  findFirstSet(T x) {
+  // Note that conversion from an unsigned type to the corresponding signed
+  // type is technically implementation-defined, but will likely work
+  // on any impementation that uses two's complement.
+  return findFirstSet(static_cast<typename std::make_signed<T>::type>(x));
+}
+
+namespace detail {
+
+// Portable, but likely slow...
+inline unsigned int findLastSetPortable(uint64_t x) {
+  unsigned int r = (x != 0);  // 1-based index, except for x==0
+  while (x >>= 1) {
+    ++r;
+  }
+  return r;
+}
+
+}  // namespace detail
+
+#ifdef __GNUC__
+
+// findLastSet: return the 1-based index of the highest bit set
+// for x > 0, findLastSet(x) == 1 + floor(log2(x))
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_unsigned<T>::value &&
+   (std::numeric_limits<T>::digits <=
+    std::numeric_limits<unsigned int>::digits)),
+  unsigned int>::type
+  findLastSet(T x) {
+  return x ? 8 * sizeof(unsigned int) - __builtin_clz(x) : 0;
+}
+
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_unsigned<T>::value &&
+   (std::numeric_limits<T>::digits >
+    std::numeric_limits<unsigned int>::digits) &&
+   (std::numeric_limits<T>::digits <=
+    std::numeric_limits<unsigned long>::digits)),
+  unsigned int>::type
+  findLastSet(T x) {
+  return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0;
+}
+
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_unsigned<T>::value &&
+   (std::numeric_limits<T>::digits >
+    std::numeric_limits<unsigned long>::digits) &&
+   (std::numeric_limits<T>::digits <=
+    std::numeric_limits<unsigned long long>::digits)),
+  unsigned int>::type
+  findLastSet(T x) {
+  return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0;
+}
+
+#else  /* !__GNUC__ */
+
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_unsigned<T>::value),
+  unsigned int>::type
+  findLastSet(T x) {
+  return detail:findLastSetPortable(x);
+}
+
+#endif
+
+template <class T>
+typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_signed<T>::value),
+  unsigned int>::type
+  findLastSet(T x) {
+  return findLastSet(static_cast<typename std::make_unsigned<T>::type>(x));
+}
+
+namespace detail {
+
+template <class T>
+inline
+typename std::enable_if<
+  std::is_integral<T>::value && std::is_unsigned<T>::value,
+  T>::type
+nextPowTwoPortable(T v) {
+  if (UNLIKELY(v == 0)) {
+    return 1;
+  }
+
+  --v;
+  for (uint32_t i = 1; i < sizeof(T) * 8; i <<= 8) {
+    v |= (v >> i);
+    v |= (v >> (i << 1));
+    v |= (v >> (i << 2));
+    v |= (v >> (i << 3));
+    v |= (v >> (i << 4));
+    v |= (v >> (i << 5));
+    v |= (v >> (i << 6));
+    v |= (v >> (i << 7));
+  }
+  return v + 1;
+}
+
+}  // namespace detail
+
+#ifdef __GNUC__
+
+template <class T>
+inline
+typename std::enable_if<
+  std::is_integral<T>::value && std::is_unsigned<T>::value,
+  T>::type
+nextPowTwo(T v) {
+  if (UNLIKELY(v == 0)) {
+    return 1;
+  }
+  return 1ul << findLastSet(v - 1);
+}
+
+#else /* __GNUC__ */
+
+template <class T>
+inline
+typename std::enable_if<
+  std::is_integral<T>::value && std::is_unsigned<T>::value,
+  T>::type
+nextPowTwo(T v) {
+  return detail::nextPowTwoPortable(v);
+}
+
+#endif /* __GNUC__ */
+
+
+
+/**
+ * Endianness detection and manipulation primitives.
+ */
+namespace detail {
+
+template <class T>
+struct EndianIntBase {
+ public:
+  static T swap(T x);
+};
+
+#define FB_GEN(t, fn) \
+template<> inline t EndianIntBase<t>::swap(t x) { return fn(x); }
+
+// fn(x) expands to (x) if the second argument is empty, which is exactly
+// what we want for [u]int8_t
+FB_GEN( int8_t,)
+FB_GEN(uint8_t,)
+FB_GEN( int64_t, bswap_64)
+FB_GEN(uint64_t, bswap_64)
+FB_GEN( int32_t, bswap_32)
+FB_GEN(uint32_t, bswap_32)
+FB_GEN( int16_t, bswap_16)
+FB_GEN(uint16_t, bswap_16)
+
+#undef FB_GEN
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+template <class T>
+struct EndianInt : public detail::EndianIntBase<T> {
+ public:
+  static T big(T x) { return EndianInt::swap(x); }
+  static T little(T x) { return x; }
+};
+
+#elif __BYTE_ORDER == __BIG_ENDIAN
+
+template <class T>
+struct EndianInt : public detail::EndianIntBase<T> {
+ public:
+  static T big(T x) { return x; }
+  static T little(T x) { return EndianInt::swap(x); }
+};
+
+#else
+# error Your machine uses a weird endianness!
+#endif  /* __BYTE_ORDER */
+
+}  // namespace detail
+
+// big* convert between native and big-endian representations
+// little* convert between native and little-endian representations
+// swap* convert between big-endian and little-endian representations
+//
+// ntohs, htons == big16
+// ntohl, htonl == big32
+#define FB_GEN1(fn, t, sz) \
+  static t fn##sz(t x) { return fn<t>(x); } \
+
+#define FB_GEN2(t, sz) \
+  FB_GEN1(swap, t, sz) \
+  FB_GEN1(big, t, sz) \
+  FB_GEN1(little, t, sz)
+
+#define FB_GEN(sz) \
+  FB_GEN2(uint##sz##_t, sz) \
+  FB_GEN2(int##sz##_t, sz)
+
+class Endian {
+ public:
+  template <class T> static T swap(T x) {
+    return detail::EndianInt<T>::swap(x);
+  }
+  template <class T> static T big(T x) {
+    return detail::EndianInt<T>::big(x);
+  }
+  template <class T> static T little(T x) {
+    return detail::EndianInt<T>::little(x);
+  }
+
+  FB_GEN(64)
+  FB_GEN(32)
+  FB_GEN(16)
+  FB_GEN(8)
+};
+
+#undef FB_GEN
+#undef FB_GEN2
+#undef FB_GEN1
+
+/**
+ * Fast bit iteration facility.
+ */
+
+
+template <class BaseIter> class BitIterator;
+template <class BaseIter>
+BitIterator<BaseIter> findFirstSet(BitIterator<BaseIter>,
+                                   BitIterator<BaseIter>);
+/**
+ * Wrapper around an iterator over an integer type that iterates
+ * over its underlying bits in LSb to MSb order.
+ *
+ * BitIterator models the same iterator concepts as the base iterator.
+ */
+template <class BaseIter>
+class BitIterator
+  : public bititerator_detail::BitIteratorBase<BaseIter>::type {
+ public:
+  /**
+   * Return the number of bits in an element of the underlying iterator.
+   */
+  static size_t bitsPerBlock() {
+    return std::numeric_limits<
+      typename std::make_unsigned<
+        typename std::iterator_traits<BaseIter>::value_type
+      >::type
+    >::digits;
+  }
+
+  /**
+   * Construct a BitIterator that points at a given bit offset (default 0)
+   * in iter.
+   */
+  explicit BitIterator(const BaseIter& iter, size_t bitOffset=0)
+    : bititerator_detail::BitIteratorBase<BaseIter>::type(iter),
+      bitOffset_(bitOffset) {
+    assert(bitOffset_ < bitsPerBlock());
+  }
+
+  size_t bitOffset() const {
+    return bitOffset_;
+  }
+
+  void advanceToNextBlock() {
+    bitOffset_ = 0;
+    ++this->base_reference();
+  }
+
+  BitIterator& operator=(const BaseIter& other) {
+    this->~BitIterator();
+    new (this) BitIterator(other);
+    return *this;
+  }
+
+ private:
+  friend class boost::iterator_core_access;
+  friend BitIterator findFirstSet<>(BitIterator, BitIterator);
+
+  typedef bititerator_detail::BitReference<
+      typename std::iterator_traits<BaseIter>::reference,
+      typename std::iterator_traits<BaseIter>::value_type
+    > BitRef;
+
+  void advanceInBlock(size_t n) {
+    bitOffset_ += n;
+    assert(bitOffset_ < bitsPerBlock());
+  }
+
+  BitRef dereference() const {
+    return BitRef(*this->base_reference(), bitOffset_);
+  }
+
+  void advance(ssize_t n) {
+    size_t bpb = bitsPerBlock();
+    ssize_t blocks = n / bpb;
+    bitOffset_ += n % bpb;
+    if (bitOffset_ >= bpb) {
+      bitOffset_ -= bpb;
+      ++blocks;
+    }
+    this->base_reference() += blocks;
+  }
+
+  void increment() {
+    if (++bitOffset_ == bitsPerBlock()) {
+      advanceToNextBlock();
+    }
+  }
+
+  void decrement() {
+    if (bitOffset_-- == 0) {
+      bitOffset_ = bitsPerBlock() - 1;
+      --this->base_reference();
+    }
+  }
+
+  bool equal(const BitIterator& other) const {
+    return (bitOffset_ == other.bitOffset_ &&
+            this->base_reference() == other.base_reference());
+  }
+
+  ssize_t distance_to(const BitIterator& other) const {
+    return
+      (other.base_reference() - this->base_reference()) * bitsPerBlock() +
+      (other.bitOffset_ - bitOffset_);
+  }
+
+  ssize_t bitOffset_;
+};
+
+/**
+ * Helper function, so you can write
+ * auto bi = makeBitIterator(container.begin());
+ */
+template <class BaseIter>
+BitIterator<BaseIter> makeBitIterator(const BaseIter& iter) {
+  return BitIterator<BaseIter>(iter);
+}
+
+
+/**
+ * Find first bit set in a range of bit iterators.
+ * 4.5x faster than the obvious std::find(begin, end, true);
+ */
+template <class BaseIter>
+BitIterator<BaseIter> findFirstSet(BitIterator<BaseIter> begin,
+                                   BitIterator<BaseIter> end) {
+  // shortcut to avoid ugly static_cast<>
+  static const typename BaseIter::value_type one = 1;
+
+  while (begin.base() != end.base()) {
+    typename BaseIter::value_type v = *begin.base();
+    // mask out the bits that don't matter (< begin.bitOffset)
+    v &= ~((one << begin.bitOffset()) - 1);
+    size_t firstSet = findFirstSet(v);
+    if (firstSet) {
+      --firstSet;  // now it's 0-based
+      assert(firstSet >= begin.bitOffset());
+      begin.advanceInBlock(firstSet - begin.bitOffset());
+      return begin;
+    }
+    begin.advanceToNextBlock();
+  }
+
+  // now begin points to the same block as end
+  if (end.bitOffset() != 0) {  // assume end is dereferenceable
+    typename BaseIter::value_type v = *begin.base();
+    // mask out the bits that don't matter (< begin.bitOffset)
+    v &= ~((one << begin.bitOffset()) - 1);
+    // mask out the bits that don't matter (>= end.bitOffset)
+    v &= (one << end.bitOffset()) - 1;
+    size_t firstSet = findFirstSet(v);
+    if (firstSet) {
+      --firstSet;  // now it's 0-based
+      assert(firstSet >= begin.bitOffset());
+      begin.advanceInBlock(firstSet - begin.bitOffset());
+      return begin;
+    }
+  }
+
+  return end;
+}
+
+}  // namespace folly
+
+#endif /* FOLLY_BITS_H_ */
+
diff --git a/folly/ConcurrentSkipList-inl.h b/folly/ConcurrentSkipList-inl.h
new file mode 100644
index 00000000..78be7243
--- /dev/null
+++ b/folly/ConcurrentSkipList-inl.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author: Xin Liu <xliux@fb.com>
+
+#ifndef FOLLY_CONCURRENTSKIPLIST_INL_H_
+#define FOLLY_CONCURRENTSKIPLIST_INL_H_
+
+#include <algorithm>
+#include <climits>
+#include <cmath>
+#include <boost/random.hpp>
+
+#include <glog/logging.h>
+#include "folly/SmallLocks.h"
+#include "folly/ThreadLocal.h"
+
+namespace folly { namespace detail {
+
+template<typename ValT, typename NodeT> class csl_iterator;
+
+template<typename T>
+class SkipListNode : boost::noncopyable {
+  enum {
+    IS_HEAD_NODE = 1,
+    MARKED_FOR_REMOVAL = (1 << 1),
+    FULLY_LINKED = (1 << 2),
+  };
+ public:
+  typedef T value_type;
+
+  static SkipListNode* create(int height,
+      const value_type& data, bool isHead = false) {
+    DCHECK(height >= 1 && height < 64) << height;
+
+    size_t size = sizeof(SkipListNode) + height * sizeof(SkipListNode*);
+    auto* node = static_cast<SkipListNode*>(malloc(size));
+    new (node) SkipListNode(height);
+
+    node->spinLock_.init();
+    node->setFlags(0);
+
+    if (isHead) {
+      node->setIsHeadNode();
+    } else {
+      new (&(node->data_)) value_type(data);
+    }
+    return node;
+  }
+
+  static void destroy(SkipListNode* node) {
+    if (!node->isHeadNode()) {
+      node->data_.~value_type();
+    }
+    node->~SkipListNode();
+    free(node);
+  }
+
+  // assuming lock acquired
+  SkipListNode* promoteFrom(const SkipListNode* node) {
+    DCHECK(node != nullptr && height_ > node->height_);
+    setFlags(node->getFlags());
+    if (!isHeadNode()) {
+      new (&(data_)) value_type(node->data());
+    }
+    for (int i = 0; i < node->height_; ++i) {
+      setSkip(i, node->skip(i));
+    }
+    return this;
+  }
+
+  inline SkipListNode* skip(int layer) const {
+    DCHECK_LT(layer, height_);
+    return skip_[layer].load(std::memory_order_consume);
+  }
+
+  // next valid node as in the linked list
+  SkipListNode* next() {
+    SkipListNode* node;
+    for (node = skip(0);
+        (node != nullptr && node->markedForRemoval());
+        node = node->skip(0)) {}
+    return node;
+  }
+
+  void setSkip(uint8_t h, SkipListNode* next) {
+    DCHECK_LT(h, height_);
+    skip_[h].store(next, std::memory_order_release);
+  }
+
+  value_type& data() { return data_; }
+  const value_type& data() const { return data_; }
+  int maxLayer() const { return height_ - 1; }
+  int height() const { return height_; }
+
+  std::unique_lock<MicroSpinLock> acquireGuard() {
+    return std::unique_lock<MicroSpinLock>(spinLock_);
+  }
+
+  bool fullyLinked() const      { return getFlags() & FULLY_LINKED; }
+  bool markedForRemoval() const { return getFlags() & MARKED_FOR_REMOVAL; }
+  bool isHeadNode() const       { return getFlags() & IS_HEAD_NODE; }
+
+  void setIsHeadNode() {
+    setFlags(getFlags() | IS_HEAD_NODE);
+  }
+  void setFullyLinked() {
+    setFlags(getFlags() | FULLY_LINKED);
+  }
+  void setMarkedForRemoval() {
+    setFlags(getFlags() | MARKED_FOR_REMOVAL);
+  }
+
+ private:
+  ~SkipListNode() {
+    for (uint8_t i = 0; i < height_; ++i) {
+      skip_[i].~atomic();
+    }
+  }
+  explicit SkipListNode(uint8_t height) : height_(height) {
+    for (uint8_t i = 0; i < height_; ++i) {
+      new (&skip_[i]) std::atomic<SkipListNode*>(nullptr);
+    }
+  }
+
+  uint16_t getFlags() const {
+    return flags_.load(std::memory_order_consume);
+  }
+  void setFlags(uint16_t flags) {
+    flags_.store(flags, std::memory_order_release);
+  }
+
+  // TODO(xliu): on x86_64, it's possible to squeeze these into
+  // skip_[0] to maybe save 8 bytes depending on the data alignments.
+  // NOTE: currently this is x86_64 only anyway, due to the
+  // MicroSpinLock.
+  std::atomic<uint16_t> flags_;
+  const uint8_t height_;
+  MicroSpinLock spinLock_;
+
+  value_type data_;
+
+  std::atomic<SkipListNode*> skip_[0];
+};
+
+class SkipListRandomHeight {
+  enum { kMaxHeight = 64 };
+ public:
+  // make it a singleton.
+  static SkipListRandomHeight *instance() {
+    static SkipListRandomHeight instance_;
+    return &instance_;
+  }
+
+  int getHeight(int maxHeight) const {
+    DCHECK_LE(maxHeight, kMaxHeight) << "max height too big!";
+    double p = randomProb();
+    for (int i = 0; i < maxHeight; ++i) {
+      if (p < lookupTable_[i]) {
+        return i + 1;
+      }
+    }
+    return maxHeight;
+  }
+
+  size_t getSizeLimit(int height) const {
+    DCHECK_LT(height, kMaxHeight);
+    return sizeLimitTable_[height];
+  }
+
+ private:
+
+  SkipListRandomHeight() { initLookupTable(); }
+
+  void initLookupTable() {
+    // set skip prob = 1/E
+    static const double kProbInv = exp(1);
+    static const double kProb = 1.0 / kProbInv;
+    static const size_t kMaxSizeLimit = std::numeric_limits<size_t>::max();
+
+    double sizeLimit = 1;
+    double p = lookupTable_[0] = (1 - kProb);
+    sizeLimitTable_[0] = 1;
+    for (int i = 1; i < kMaxHeight - 1; ++i) {
+      p *= kProb;
+      sizeLimit *= kProbInv;
+      lookupTable_[i] = lookupTable_[i - 1] + p;
+      sizeLimitTable_[i] = sizeLimit > kMaxSizeLimit ?
+        kMaxSizeLimit :
+        static_cast<size_t>(sizeLimit);
+    }
+    lookupTable_[kMaxHeight - 1] = 1;
+    sizeLimitTable_[kMaxHeight - 1] = kMaxSizeLimit;
+  }
+
+  static double randomProb() {
+    static ThreadLocal<boost::lagged_fibonacci2281> rng_;
+    return (*rng_)();
+  }
+
+  double lookupTable_[kMaxHeight];
+  size_t sizeLimitTable_[kMaxHeight];
+};
+
+}}
+
+#endif  // FOLLY_CONCURRENTSKIPLIST_INL_H_
diff --git a/folly/ConcurrentSkipList.h b/folly/ConcurrentSkipList.h
new file mode 100644
index 00000000..c5223849
--- /dev/null
+++ b/folly/ConcurrentSkipList.h
@@ -0,0 +1,852 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author: Xin Liu <xliux@fb.com>
+//
+// A concurrent skip list (CSL) implementation.
+// Ref: http://www.cs.tau.ac.il/~shanir/nir-pubs-web/Papers/OPODIS2006-BA.pdf
+
+/*
+
+This implements a sorted associative container that supports only
+unique keys.  (Similar to std::set.)
+
+Features:
+
+  1. Small memory overhead: ~40% less memory overhead compared with
+     std::set (1.6 words per node versus 3). It has an minimum of 4
+     words (7 words if there nodes got deleted) per-list overhead
+     though.
+
+  2. Read accesses (count, find iterator, skipper) are lock-free and
+     mostly wait-free (the only wait a reader may need to do is when
+     the node it is visiting is in a pending stage, i.e. deleting,
+     adding and not fully linked).  Write accesses (remove, add) need
+     to acquire locks, but locks are local to the predecessor nodes
+     and/or successor nodes.
+
+  3. Good high contention performance, comparable single-thread
+     performance.  In the multithreaded case (12 workers), CSL tested
+     10x faster than a RWSpinLocked std::set for an averaged sized
+     list (1K - 1M nodes).
+
+     Comparable read performance to std::set when single threaded,
+     especially when the list size is large, and scales better to
+     larger lists: when the size is small, CSL can be 20-50% slower on
+     find()/contains().  As the size gets large (> 1M elements),
+     find()/contains() can be 30% faster.
+
+     Iterating through a skiplist is similar to iterating through a
+     linked list, thus is much (2-6x) faster than on a std::set
+     (tree-based).  This is especially true for short lists due to
+     better cache locality.  Based on that, it's also faster to
+     intersect two skiplists.
+
+  4. Lazy removal with GC support.  The removed nodes get deleted when
+     the last Accessor to the skiplist is destroyed.
+
+Caveats:
+
+  1. Write operations are usually 30% slower than std::set in a single
+     threaded environment.
+
+  2. Need to have a head node for each list, which has a 4 word
+     overhead.
+
+  3. When the list is quite small (< 1000 elements), single threaded
+     benchmarks show CSL can be 10x slower than std:set.
+
+  4. The interface requires using an Accessor to access the skiplist.
+    (See below.)
+
+  5. Currently x64 only, due to use of MicroSpinLock.
+
+  6. Freed nodes will not be reclaimed as long as there are ongoing
+     uses of the list.
+
+Sample usage:
+
+     typedef ConcurrentSkipList<int> SkipListT;
+     shared_ptr<SkipListT> sl(SkipListT::createInstance(init_head_height);
+     {
+       // It's usually good practice to hold an accessor only during
+       // its necessary life cycle (but not in a tight loop as
+       // Accessor creation incurs ref-counting overhead).
+       //
+       // Holding it longer delays garbage-collecting the deleted
+       // nodes in the list.
+       SkipListT::Accessor accessor(sl);
+       accessor.insert(23);
+       accessor.erase(2);
+       for (auto &elem : accessor) {
+         // use elem to access data
+       }
+       ... ...
+     }
+
+ Another useful type is the Skipper accessor.  This is useful if you
+ want to skip to locations in the way std::lower_bound() works,
+ i.e. it can be used for going through the list by skipping to the
+ node no less than a specified key.  The Skipper keeps its location as
+ state, which makes it convenient for things like implementing
+ intersection of two sets efficiently, as it can start from the last
+ visited position.
+
+     {
+       SkipListT::Accessor accessor(sl);
+       SkipListT::Skipper skipper(accessor);
+       skipper.to(30);
+       if (skipper) {
+         CHECK_LE(30, *skipper);
+       }
+       ...  ...
+       // GC may happen when the accessor gets destructed.
+     }
+*/
+
+#ifndef FOLLY_CONCURRENT_SKIP_LIST_H_
+#define FOLLY_CONCURRENT_SKIP_LIST_H_
+
+#include <algorithm>
+#include <climits>
+#include <type_traits>
+#include <utility>
+#include <vector>
+#include <atomic>
+#include <thread>
+#include <boost/iterator/iterator_facade.hpp>
+#include <boost/scoped_ptr.hpp>
+#include <boost/shared_ptr.hpp>
+
+#include <glog/logging.h>
+#include "folly/ConcurrentSkipList-inl.h"
+#include "folly/Likely.h"
+#include "folly/SmallLocks.h"
+
+namespace folly {
+
+template<typename T, typename Comp=std::less<T>, int MAX_HEIGHT=24>
+class ConcurrentSkipList {
+  // MAX_HEIGHT needs to be at least 2 to suppress compiler
+  // warnings/errors (Werror=uninitialized tiggered due to preds_[1]
+  // being treated as a scalar in the compiler).
+  static_assert(MAX_HEIGHT >= 2 && MAX_HEIGHT < 64,
+      "MAX_HEIGHT can only be in the range of [2, 64)");
+  typedef detail::SkipListNode<T> NodeType;
+  typedef std::unique_lock<folly::MicroSpinLock> ScopedLocker;
+  typedef ConcurrentSkipList<T, Comp, MAX_HEIGHT> SkipListType;
+
+ public:
+  typedef T value_type;
+  typedef T key_type;
+
+
+  typedef detail::csl_iterator<value_type, NodeType> iterator;
+  typedef detail::csl_iterator<const value_type, const NodeType> const_iterator;
+
+  class Accessor;
+  class Skipper;
+
+  // convenient function to get an Accessor to a new instance.
+  static Accessor create(int height=1) {
+    return Accessor(createInstance(height));
+  }
+
+  // create a shared_ptr skiplist object with initial head height.
+  static boost::shared_ptr<SkipListType> createInstance(int height=1) {
+    return boost::shared_ptr<SkipListType>(new SkipListType(height));
+  }
+
+  //===================================================================
+  // Below are implementation details.
+  // Please see ConcurrentSkipList::Accessor for stdlib-like APIs.
+  //===================================================================
+
+  ~ConcurrentSkipList() {
+    LOG_IF(FATAL, recycler_.refs() > 0)
+      << "number of accessors is not 0, " << recycler_.refs() << " instead!"
+      << " This shouldn't have happened!";
+    while (NodeType* current = head_.load(std::memory_order_relaxed)) {
+      NodeType* tmp = current->skip(0);
+      NodeType::destroy(current);
+      head_.store(tmp, std::memory_order_relaxed);
+    }
+  }
+
+ private:
+
+  static bool greater(const value_type &data, const NodeType *node) {
+    return node && Comp()(node->data(), data);
+  }
+
+  static bool less(const value_type &data, const NodeType *node) {
+    return (node == nullptr) || Comp()(data, node->data());
+  }
+
+  static int findInsertionPoint(NodeType *cur, int cur_layer,
+      const value_type &data,
+      NodeType *preds[], NodeType *succs[]) {
+    int foundLayer = -1;
+    NodeType *pred = cur;
+    NodeType *foundNode = nullptr;
+    for (int layer = cur_layer; layer >= 0; --layer) {
+      NodeType *node = pred->skip(layer);
+      while (greater(data, node)) {
+        pred = node;
+        node = node->skip(layer);
+      }
+      if (foundLayer == -1 && !less(data, node)) { // the two keys equal
+        foundLayer = layer;
+        foundNode = node;
+      }
+      preds[layer] = pred;
+
+      // if found, succs[0..foundLayer] need to point to the cached foundNode,
+      // as foundNode might be deleted at the same time thus pred->skip() can
+      // return NULL or another node.
+      succs[layer] = foundNode ? foundNode : node;
+    }
+    return foundLayer;
+  }
+
+  struct Recycler : private boost::noncopyable {
+    Recycler() : refs_(0), dirty_(false) { lock_.init(); }
+
+    ~Recycler() {
+      if (nodes_) {
+        for (auto& node : *nodes_) {
+          NodeType::destroy(node);
+        }
+      }
+    }
+
+    void add(NodeType* node) {
+      std::lock_guard<MicroSpinLock> g(lock_);
+      if (nodes_.get() == nullptr) {
+        nodes_.reset(new std::vector<NodeType*>(1, node));
+      } else {
+        nodes_->push_back(node);
+      }
+      DCHECK_GT(refs(), 0);
+      dirty_.store(true, std::memory_order_relaxed);
+    }
+
+    int refs() const {
+      return refs_.load(std::memory_order_relaxed);
+    }
+
+    int addRef() {
+      return refs_.fetch_add(1, std::memory_order_relaxed);
+    }
+
+    int release() {
+      // We don't expect to clean the recycler immediately everytime it is OK
+      // to do so. Here, it is possible that multiple accessors all release at
+      // the same time but nobody would clean the recycler here. If this
+      // happens, the recycler will usually still get cleaned when
+      // such a race doesn't happen. The worst case is the recycler will
+      // eventually get deleted along with the skiplist.
+      if (LIKELY(!dirty_.load(std::memory_order_relaxed) || refs() > 1)) {
+        return refs_.fetch_add(-1, std::memory_order_relaxed);
+      }
+
+      boost::scoped_ptr<std::vector<NodeType*> > newNodes;
+      {
+        std::lock_guard<MicroSpinLock> g(lock_);
+        if (nodes_.get() == nullptr || refs() > 1) {
+          return refs_.fetch_add(-1, std::memory_order_relaxed);
+        }
+        // once refs_ reaches 1 and there is no other accessor, it is safe to
+        // remove all the current nodes in the recycler, as we already acquired
+        // the lock here so no more new nodes can be added, even though new
+        // accessors may be added after that.
+        newNodes.swap(nodes_);
+        dirty_.store(false, std::memory_order_relaxed);
+      }
+
+      // TODO(xliu) should we spawn a thread to do this when there are large
+      // number of nodes in the recycler?
+      for (auto& node : *newNodes) {
+        NodeType::destroy(node);
+      }
+
+      // decrease the ref count at the very end, to minimize the
+      // chance of other threads acquiring lock_ to clear the deleted
+      // nodes again.
+      return refs_.fetch_add(-1, std::memory_order_relaxed);
+    }
+
+   private:
+    boost::scoped_ptr<std::vector<NodeType*>> nodes_;
+    std::atomic<int32_t> refs_; // current number of visitors to the list
+    std::atomic<bool> dirty_; // whether *nodes_ is non-empty
+    MicroSpinLock lock_; // protects access to *nodes_
+  };  // class ConcurrentSkipList::Recycler
+
+  explicit ConcurrentSkipList(int height) :
+    head_(NodeType::create(height, value_type(), true)), size_(0) {}
+
+  size_t size() const { return size_.load(std::memory_order_relaxed); }
+  int height() const {
+    return head_.load(std::memory_order_consume)->height();
+  }
+  int maxLayer() const { return height() - 1; }
+
+  size_t incrementSize(int delta) {
+    return size_.fetch_add(delta, std::memory_order_relaxed) + delta;
+  }
+
+  // Returns the node if found, nullptr otherwise.
+  NodeType* find(const value_type &data) {
+    auto ret = findNode(data);
+    if (ret.second && !ret.first->markedForRemoval()) return ret.first;
+    return nullptr;
+  }
+
+  // lock all the necessary nodes for changing (adding or removing) the list.
+  // returns true if all the lock acquried successfully and the related nodes
+  // are all validate (not in certain pending states), false otherwise.
+  bool lockNodesForChange(int nodeHeight,
+      ScopedLocker guards[MAX_HEIGHT],
+      NodeType *preds[MAX_HEIGHT],
+      NodeType *succs[MAX_HEIGHT],
+      bool adding=true) {
+    NodeType *pred, *succ, *prevPred = nullptr;
+    bool valid = true;
+    for (int layer = 0; valid && layer < nodeHeight; ++layer) {
+      pred = preds[layer];
+      DCHECK(pred != nullptr) << "layer=" << layer << " height=" << height()
+        << " nodeheight=" << nodeHeight;
+      succ = succs[layer];
+      if (pred != prevPred) {
+        guards[layer] = pred->acquireGuard();
+        prevPred = pred;
+      }
+      valid = !pred->markedForRemoval() &&
+        pred->skip(layer) == succ;  // check again after locking
+
+      if (adding) {  // when adding a node, the succ shouldn't be going away
+        valid = valid && (succ == nullptr || !succ->markedForRemoval());
+      }
+    }
+
+    return valid;
+  }
+
+  // Returns a paired value:
+  //   pair.first always stores the pointer to the node with the same input key.
+  //     It could be either the newly added data, or the existed data in the
+  //     list with the same key.
+  //   pair.second stores whether the data is added successfully:
+  //     0 means not added, otherwise reutrns the new size.
+  std::pair<NodeType*, size_t> addOrGetData(const value_type &data) {
+    NodeType *preds[MAX_HEIGHT], *succs[MAX_HEIGHT];
+    NodeType *newNode;
+    size_t newSize;
+    while (true) {
+      int max_layer = 0;
+      int layer = findInsertionPointGetMaxLayer(data, preds, succs, &max_layer);
+
+      if (layer >= 0) {
+        NodeType *nodeFound = succs[layer];
+        DCHECK(nodeFound != nullptr);
+        if (nodeFound->markedForRemoval()) {
+          continue;  // if it's getting deleted retry finding node.
+        }
+        // wait until fully linked.
+        while (UNLIKELY(!nodeFound->fullyLinked())) {}
+        return std::make_pair(nodeFound, 0);
+      }
+
+      // need to capped at the original height -- the real height may have grown
+      int nodeHeight = detail::SkipListRandomHeight::instance()->
+        getHeight(max_layer + 1);
+
+      ScopedLocker guards[MAX_HEIGHT];
+      if (!lockNodesForChange(nodeHeight, guards, preds, succs)) {
+        continue; // give up the locks and retry until all valid
+      }
+
+      // locks acquired and all valid, need to modify the links under the locks.
+      newNode = NodeType::create(nodeHeight, data);
+      for (int layer = 0; layer < nodeHeight; ++layer) {
+        newNode->setSkip(layer, succs[layer]);
+        preds[layer]->setSkip(layer, newNode);
+      }
+
+      newNode->setFullyLinked();
+      newSize = incrementSize(1);
+      break;
+    }
+
+    int hgt = height();
+    size_t sizeLimit =
+      detail::SkipListRandomHeight::instance()->getSizeLimit(hgt);
+
+    if (hgt < MAX_HEIGHT && newSize > sizeLimit) {
+      growHeight(hgt + 1);
+    }
+    CHECK_GT(newSize, 0);
+    return std::make_pair(newNode, newSize);
+  }
+
+  bool remove(const value_type &data) {
+    NodeType *nodeToDelete = nullptr;
+    ScopedLocker nodeGuard;
+    bool isMarked = false;
+    int nodeHeight = 0;
+    NodeType* preds[MAX_HEIGHT], *succs[MAX_HEIGHT];
+
+    while (true) {
+      int max_layer = 0;
+      int layer = findInsertionPointGetMaxLayer(data, preds, succs, &max_layer);
+      if (!isMarked && (layer < 0 || !okToDelete(succs[layer], layer))) {
+        return false;
+      }
+
+      if (!isMarked) {
+        nodeToDelete = succs[layer];
+        nodeHeight = nodeToDelete->height();
+        nodeGuard = nodeToDelete->acquireGuard();
+        if (nodeToDelete->markedForRemoval()) return false;
+        nodeToDelete->setMarkedForRemoval();
+        isMarked = true;
+      }
+
+      // acquire pred locks from bottom layer up
+      ScopedLocker guards[MAX_HEIGHT];
+      if (!lockNodesForChange(nodeHeight, guards, preds, succs, false)) {
+        continue;  // this will unlock all the locks
+      }
+
+      for (int layer = nodeHeight - 1; layer >= 0; --layer) {
+        preds[layer]->setSkip(layer, nodeToDelete->skip(layer));
+      }
+
+      incrementSize(-1);
+      break;
+    }
+    recycle(nodeToDelete);
+    return true;
+  }
+
+  const value_type *first() const {
+    auto node = head_.load(std::memory_order_consume)->skip(0);
+    return node ? &node->data() : nullptr;
+  }
+
+  const value_type *last() const {
+    NodeType *pred = head_.load(std::memory_order_consume);
+    NodeType *node = nullptr;
+    for (int layer = maxLayer(); layer >= 0; --layer) {
+      do {
+        node = pred->skip(layer);
+        if (node) pred = node;
+      } while (node != nullptr);
+    }
+    return pred == head_.load(std::memory_order_relaxed)
+      ? nullptr : &pred->data();
+  }
+
+  static bool okToDelete(NodeType *candidate, int layer) {
+    DCHECK(candidate != nullptr);
+    return candidate->fullyLinked() &&
+      candidate->maxLayer() == layer &&
+      !candidate->markedForRemoval();
+  }
+
+  // find node for insertion/deleting
+  int findInsertionPointGetMaxLayer(const value_type &data,
+      NodeType *preds[], NodeType *succs[], int *max_layer) const {
+    *max_layer = maxLayer();
+    return findInsertionPoint(head_.load(std::memory_order_consume),
+      *max_layer, data, preds, succs);
+  }
+
+  // Find node for access. Returns a paired values:
+  // pair.first = the first node that no-less than data value
+  // pair.second = 1 when the data value is founded, or 0 otherwise.
+  // This is like lower_bound, but not exact: we could have the node marked for
+  // removal so still need to check that.
+  std::pair<NodeType*, int> findNode(const value_type &data) const {
+    return findNodeDownRight(data);
+  }
+
+  // Find node by first stepping down then stepping right. Based on benchmark
+  // results, this is slightly faster than findNodeRightDown for better
+  // localality on the skipping pointers.
+  std::pair<NodeType*, int> findNodeDownRight(const value_type &data) const {
+    NodeType *pred = head_.load(std::memory_order_consume);
+    int ht = pred->height();
+    NodeType *node = nullptr;
+
+    bool found = false;
+    while (!found) {
+      // stepping down
+      for (; ht > 0 && less(data, pred->skip(ht - 1)); --ht) {}
+      if (ht == 0) return std::make_pair(pred->skip(0), 0);  // not found
+
+      node = pred->skip(--ht);  // node <= data now
+      // stepping right
+      while (greater(data, node)) {
+        pred = node;
+        node = node->skip(ht);
+      }
+      found = !less(data, node);
+    }
+    return std::make_pair(node, found);
+  }
+
+  // find node by first stepping right then stepping down.
+  // We still keep this for reference purposes.
+  std::pair<NodeType*, int> findNodeRightDown(const value_type &data) const {
+    NodeType *pred = head_.load(std::memory_order_consume);
+    NodeType *node = nullptr;
+    auto top = maxLayer();
+    int found = 0;
+    for (int layer = top; !found && layer >= 0; --layer) {
+      node = pred->skip(layer);
+      while (greater(data, node)) {
+        pred = node;
+        node = node->skip(layer);
+      }
+      found = !less(data, node);
+    }
+    return std::make_pair(node, found);
+  }
+
+  NodeType* lower_bound(const value_type &data) const {
+    auto node = findNode(data).first;
+    while (node != nullptr && node->markedForRemoval()) {
+      node = node->skip(0);
+    }
+    return node;
+  }
+
+  void growHeight(int height) {
+    NodeType* oldHead = head_.load(std::memory_order_consume);
+    if (oldHead->height() >= height) {  // someone else already did this
+      return;
+    }
+
+    NodeType* newHead = NodeType::create(height, value_type(), true);
+
+    { // need to guard the head node in case others are adding/removing
+      // nodes linked to the head.
+      ScopedLocker g = oldHead->acquireGuard();
+      newHead->promoteFrom(oldHead);
+      NodeType* expected = oldHead;
+      if (!head_.compare_exchange_strong(expected, newHead,
+          std::memory_order_release)) {
+        // if someone has already done the swap, just return.
+        NodeType::destroy(newHead);
+        return;
+      }
+      oldHead->setMarkedForRemoval();
+    }
+    recycle(oldHead);
+  }
+
+  void recycle(NodeType *node) {
+    recycler_.add(node);
+  }
+
+  std::atomic<NodeType*> head_;
+  Recycler recycler_;
+  std::atomic<size_t> size_;
+};
+
+template<typename T, typename Comp, int MAX_HEIGHT>
+class ConcurrentSkipList<T, Comp, MAX_HEIGHT>::Accessor {
+  typedef detail::SkipListNode<T> NodeType;
+  typedef ConcurrentSkipList<T, Comp, MAX_HEIGHT> SkipListType;
+ public:
+  typedef T value_type;
+  typedef T key_type;
+  typedef T& reference;
+  typedef T* pointer;
+  typedef const T& const_reference;
+  typedef const T* const_pointer;
+  typedef size_t size_type;
+  typedef Comp key_compare;
+  typedef Comp value_compare;
+
+  typedef typename SkipListType::iterator iterator;
+  typedef typename SkipListType::const_iterator const_iterator;
+  typedef typename SkipListType::Skipper Skipper;
+
+  explicit Accessor(boost::shared_ptr<ConcurrentSkipList> skip_list)
+    : slHolder_(std::move(skip_list))
+  {
+    sl_ = slHolder_.get();
+    DCHECK(sl_ != nullptr);
+    sl_->recycler_.addRef();
+  }
+
+  // Unsafe initializer: the caller assumes the responsibility to keep
+  // skip_list valid during the whole life cycle of the Acessor.
+  explicit Accessor(ConcurrentSkipList *skip_list) : sl_(skip_list) {
+    DCHECK(sl_ != nullptr);
+    sl_->recycler_.addRef();
+  }
+
+  Accessor(const Accessor &accessor) :
+      sl_(accessor.sl_),
+      slHolder_(accessor.slHolder_) {
+    sl_->recycler_.addRef();
+  }
+
+  Accessor& operator=(const Accessor &accessor) {
+    if (this != &accessor) {
+      slHolder_ = accessor.slHolder_;
+      sl_->recycler_.release();
+      sl_ = accessor.sl_;
+      sl_->recycler_.addRef();
+    }
+    return *this;
+  }
+
+  ~Accessor() {
+    sl_->recycler_.release();
+  }
+
+  bool empty() const { return sl_->size() == 0; }
+  size_t size() const { return sl_->size(); }
+  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
+
+  // returns end() if the value is not in the list, otherwise returns an
+  // iterator pointing to the data, and it's guaranteed that the data is valid
+  // as far as the Accessor is hold.
+  iterator find(const key_type &value) { return iterator(sl_->find(value)); }
+  const_iterator find(const key_type &value) const {
+    return iterator(sl_->find(value));
+  }
+  size_type count(const key_type &data) const { return contains(data); }
+
+  iterator begin() const {
+    NodeType* head = sl_->head_.load(std::memory_order_consume);
+    return iterator(head->next());
+  }
+  iterator end() const { return iterator(nullptr); }
+  const_iterator cbegin() const { return begin(); }
+  const_iterator cend() const { return end(); }
+
+  std::pair<iterator, bool> insert(const key_type &data) {
+    auto ret = sl_->addOrGetData(data);
+    return std::make_pair(iterator(ret.first), ret.second);
+  }
+  size_t erase(const key_type &data) { return remove(data); }
+
+  iterator lower_bound(const key_type &data) const {
+    return iterator(sl_->lower_bound(data));
+  }
+
+  size_t height() const { return sl_->height(); }
+
+  // first() returns pointer to the first element in the skiplist, or
+  // nullptr if empty.
+  //
+  // last() returns the pointer to the last element in the skiplist,
+  // nullptr if list is empty.
+  //
+  // Note: As concurrent writing can happen, first() is not
+  //   guaranteed to be the min_element() in the list. Similarly
+  //   last() is not guaranteed to be the max_element(), and both of them can
+  //   be invalid (i.e. nullptr), so we name them differently from front() and
+  //   tail() here.
+  const key_type *first() const { return sl_->first(); }
+  const key_type *last() const { return sl_->last(); }
+
+  // Try to remove the last element in the skip list.
+  //
+  // Returns true if we removed it, false if either the list is empty
+  // or a race condition happened (i.e. the used-to-be last element
+  // was already removed by another thread).
+  bool pop_back() {
+    auto last = sl_->last();
+    return last ? sl_->remove(*last) : false;
+  }
+
+  std::pair<key_type*, bool> addOrGetData(const key_type &data) {
+    auto ret = sl_->addOrGetData(data);
+    return std::make_pair(&ret.first->data(), ret.second);
+  }
+
+  SkipListType* skiplist() const { return sl_; }
+
+  // legacy interfaces
+  // TODO:(xliu) remove these.
+  // Returns true if the node is added successfully, false if not, i.e. the
+  // node with the same key already existed in the list.
+  bool contains(const key_type &data) const { return sl_->find(data); }
+  bool add(const key_type &data) { return sl_->addOrGetData(data).second; }
+  bool remove(const key_type &data) { return sl_->remove(data); }
+
+ private:
+  SkipListType *sl_;
+  boost::shared_ptr<SkipListType> slHolder_;
+};
+
+// implements forward iterator concept.
+template<typename ValT, typename NodeT>
+class detail::csl_iterator :
+  public boost::iterator_facade<csl_iterator<ValT, NodeT>,
+    ValT, boost::forward_traversal_tag> {
+ public:
+  typedef ValT value_type;
+  typedef value_type& reference;
+  typedef value_type* pointer;
+  typedef ptrdiff_t difference_type;
+
+  explicit csl_iterator(NodeT* node = nullptr) : node_(node) {}
+
+  template<typename OtherVal, typename OtherNode>
+  csl_iterator(const csl_iterator<OtherVal, OtherNode> &other,
+      typename std::enable_if<std::is_convertible<OtherVal, ValT>::value>::type*
+      = 0) : node_(other.node_) {}
+
+  size_t nodeSize() const {
+    return node_ == nullptr ? 0 :
+      node_->height() * sizeof(NodeT*) + sizeof(*this);
+  }
+
+  bool good() const { return node_ != nullptr; }
+
+ private:
+  friend class boost::iterator_core_access;
+  template<class,class> friend class csl_iterator;
+
+  void increment() { node_ = node_->next(); };
+  bool equal(const csl_iterator& other) const { return node_ == other.node_; }
+  value_type& dereference() const { return node_->data(); }
+
+  NodeT* node_;
+};
+
+// Skipper interface
+template<typename T, typename Comp, int MAX_HEIGHT>
+class ConcurrentSkipList<T, Comp, MAX_HEIGHT>::Skipper {
+  typedef detail::SkipListNode<T> NodeType;
+  typedef ConcurrentSkipList<T, Comp, MAX_HEIGHT> SkipListType;
+  typedef typename SkipListType::Accessor Accessor;
+
+ public:
+  typedef T  value_type;
+  typedef T& reference;
+  typedef T* pointer;
+  typedef ptrdiff_t difference_type;
+
+  Skipper(const boost::shared_ptr<SkipListType>& skipList) :
+    accessor_(skipList) {
+    init();
+  }
+
+  Skipper(const Accessor& accessor) : accessor_(accessor) {
+    init();
+  }
+
+  void init() {
+    // need to cache the head node
+    NodeType* head_node = head();
+    headHeight_ = head_node->height();
+    for (int i = 0; i < headHeight_; ++i) {
+      preds_[i] = head_node;
+      succs_[i] = head_node->skip(i);
+    }
+    int max_layer = maxLayer();
+    for (int i = 0; i < max_layer; ++i) {
+      hints_[i] = i + 1;
+    }
+    hints_[max_layer] = max_layer;
+  }
+
+  // advance to the next node in the list.
+  Skipper& operator ++() {
+    preds_[0] = succs_[0];
+    succs_[0] = preds_[0]->skip(0);
+    int height = curHeight();
+    for (int i = 1; i < height && preds_[0] == succs_[i]; ++i) {
+      preds_[i] = succs_[i];
+      succs_[i] = preds_[i]->skip(i);
+    }
+    return *this;
+  }
+
+  bool good() const { return succs_[0] != nullptr; }
+
+  int maxLayer() const { return headHeight_ - 1; }
+
+  int curHeight() const {
+    // need to cap the height to the cached head height, as the current node
+    // might be some newly inserted node and also during the time period the
+    // head height may have grown.
+    return succs_[0] ? std::min(headHeight_, succs_[0]->height()) : 0;
+  }
+
+  const value_type &data() const {
+    DCHECK(succs_[0] != NULL);
+    return succs_[0]->data();
+  }
+
+  value_type &operator *() const {
+    DCHECK(succs_[0] != NULL);
+    return succs_[0]->data();
+  }
+
+  value_type *operator->() {
+    DCHECK(succs_[0] != NULL);
+    return &succs_[0]->data();
+  }
+
+  /*
+   * Skip to the position whose data is no less than the parameter.
+   * (I.e. the lower_bound).
+   *
+   * Returns true if the data is found, false otherwise.
+   */
+  bool to(const value_type &data) {
+    int layer = curHeight() - 1;
+    if (layer < 0) return false;   // reaches the end of the list
+
+    int lyr = hints_[layer];
+    int max_layer = maxLayer();
+    while (SkipListType::greater(data, succs_[lyr]) && lyr < max_layer) {
+      ++lyr;
+    }
+    hints_[layer] = lyr;  // update the hint
+
+    int foundLayer = SkipListType::
+      findInsertionPoint(preds_[lyr], lyr, data, preds_, succs_);
+    if (foundLayer < 0) return false;
+
+    DCHECK(succs_[0] != NULL) << "lyr=" << lyr << "; max_layer=" << max_layer;
+    return !succs_[0]->markedForRemoval();
+  }
+
+ private:
+  NodeType* head() const {
+    return accessor_.skiplist()->head_.load(std::memory_order_consume);
+  }
+
+  Accessor accessor_;
+  int headHeight_;
+  NodeType *succs_[MAX_HEIGHT], *preds_[MAX_HEIGHT];
+  uint8_t hints_[MAX_HEIGHT];
+};
+
+} // namespace folly
+
+#endif  // FOLLY_CONCURRENT_SKIP_LIST_H_
diff --git a/folly/Conv.cpp b/folly/Conv.cpp
new file mode 100644
index 00000000..c65ba04a
--- /dev/null
+++ b/folly/Conv.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define FOLLY_CONV_INTERNAL
+#include "folly/Conv.h"
+
+namespace folly {
+namespace detail {
+
+extern const char digit1[101] =
+  "00000000001111111111222222222233333333334444444444"
+  "55555555556666666666777777777788888888889999999999";
+extern const char digit2[101] =
+  "01234567890123456789012345678901234567890123456789"
+  "01234567890123456789012345678901234567890123456789";
+
+template <> const char *const MaxString<bool>::value = "true";
+template <> const char *const MaxString<uint8_t>::value = "255";
+template <> const char *const MaxString<uint16_t>::value = "65535";
+template <> const char *const MaxString<uint32_t>::value = "4294967295";
+#if __SIZEOF_LONG__ == 4
+template <> const char *const MaxString<unsigned long>::value =
+  "4294967295";
+#else
+template <> const char *const MaxString<unsigned long>::value =
+  "18446744073709551615";
+#endif
+static_assert(sizeof(unsigned long) >= 4,
+              "Wrong value for MaxString<unsigned long>::value,"
+              " please update.");
+template <> const char *const MaxString<unsigned long long>::value =
+  "18446744073709551615";
+static_assert(sizeof(unsigned long long) >= 8,
+              "Wrong value for MaxString<unsigned long long>::value"
+              ", please update.");
+
+inline bool bool_str_cmp(const char** b, size_t len, const char* value) {
+  // Can't use strncasecmp, since we want to ensure that the full value matches
+  const char* p = *b;
+  const char* e = *b + len;
+  const char* v = value;
+  while (*v != '\0') {
+    if (p == e || tolower(*p) != *v) { // value is already lowercase
+      return false;
+    }
+    ++p;
+    ++v;
+  }
+
+  *b = p;
+  return true;
+}
+
+bool str_to_bool(StringPiece* src) {
+  auto b = src->begin(), e = src->end();
+  for (;; ++b) {
+    FOLLY_RANGE_CHECK(b < e,
+                      "No non-whitespace characters found in input string");
+    if (!isspace(*b)) break;
+  }
+
+  bool result;
+  size_t len = e - b;
+  switch (*b) {
+    case '0':
+    case '1': {
+      // Attempt to parse the value as an integer
+      StringPiece tmp(*src);
+      uint8_t value = to<uint8_t>(&tmp);
+      // Only accept 0 or 1
+      FOLLY_RANGE_CHECK(value <= 1,
+                        "Integer overflow when parsing bool: must be 0 or 1");
+      b = tmp.begin();
+      result = (value == 1);
+      break;
+    }
+    case 'y':
+    case 'Y':
+      result = true;
+      if (!bool_str_cmp(&b, len, "yes")) {
+        ++b;  // accept the single 'y' character
+      }
+      break;
+    case 'n':
+    case 'N':
+      result = false;
+      if (!bool_str_cmp(&b, len, "no")) {
+        ++b;
+      }
+      break;
+    case 't':
+    case 'T':
+      result = true;
+      if (!bool_str_cmp(&b, len, "true")) {
+        ++b;
+      }
+      break;
+    case 'f':
+    case 'F':
+      result = false;
+      if (!bool_str_cmp(&b, len, "false")) {
+        ++b;
+      }
+      break;
+    case 'o':
+    case 'O':
+      if (bool_str_cmp(&b, len, "on")) {
+        result = true;
+      } else if (bool_str_cmp(&b, len, "off")) {
+        result = false;
+      } else {
+        FOLLY_RANGE_CHECK(false, "Invalid value for bool");
+      }
+      break;
+    default:
+      FOLLY_RANGE_CHECK(false, "Invalid value for bool");
+  }
+
+  src->assign(b, e);
+  return result;
+}
+
+} // namespace detail
+} // namespace folly
diff --git a/folly/Conv.h b/folly/Conv.h
new file mode 100644
index 00000000..501e5877
--- /dev/null
+++ b/folly/Conv.h
@@ -0,0 +1,844 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Converts anything to anything, with an emphasis on performance and
+ * safety.
+ *
+ * @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
+ */
+
+#ifndef FOLLY_BASE_CONV_H_
+#define FOLLY_BASE_CONV_H_
+
+#include "folly/FBString.h"
+#include "folly/Likely.h"
+#include "folly/Preprocessor.h"
+#include "folly/Range.h"
+
+#include <boost/implicit_cast.hpp>
+#include <type_traits>
+#include <limits>
+#include <string>
+#include <tuple>
+#include <stdexcept>
+#include <typeinfo>
+
+#include "double-conversion.h"   // V8 JavaScript implementation
+
+#define FOLLY_RANGE_CHECK(condition, message)                           \
+  ((condition) ? (void)0 : throw std::range_error(                      \
+    (__FILE__ "(" + std::to_string((long long int) __LINE__) + "): "    \
+     + (message)).c_str()))
+
+namespace folly {
+
+/*******************************************************************************
+ * Integral to integral
+ ******************************************************************************/
+
+/**
+ * Checked conversion from integral to integral. The checks are only
+ * performed when meaningful, e.g. conversion from int to long goes
+ * unchecked.
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_integral<Src>::value && std::is_integral<Tgt>::value,
+  Tgt>::type
+to(const Src & value) {
+  /* static */ if (std::numeric_limits<Tgt>::max()
+                   < std::numeric_limits<Src>::max()) {
+    FOLLY_RANGE_CHECK(value <= std::numeric_limits<Tgt>::max(),
+                      "Overflow");
+  }
+  /* static */ if (std::is_signed<Src>::value &&
+                   (!std::is_signed<Tgt>::value || sizeof(Src) > sizeof(Tgt))) {
+    FOLLY_RANGE_CHECK(value >= std::numeric_limits<Tgt>::min(),
+                      "Negative overflow");
+  }
+  return static_cast<Tgt>(value);
+}
+
+/*******************************************************************************
+ * Floating point to floating point
+ ******************************************************************************/
+
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_floating_point<Tgt>::value && std::is_floating_point<Src>::value,
+  Tgt>::type
+to(const Src & value) {
+  /* static */ if (std::numeric_limits<Tgt>::max() <
+                   std::numeric_limits<Src>::max()) {
+    FOLLY_RANGE_CHECK(value <= std::numeric_limits<Tgt>::max(),
+                      "Overflow");
+    FOLLY_RANGE_CHECK(value >= -std::numeric_limits<Tgt>::max(),
+                      "Negative overflow");
+  }
+  return boost::implicit_cast<Tgt>(value);
+}
+
+/*******************************************************************************
+ * Anything to string
+ ******************************************************************************/
+
+namespace detail {
+
+template <class T> struct IsSomeString {
+  enum { value = std::is_same<T, std::string>::value
+         || std::is_same<T, fbstring>::value };
+};
+
+template <class T>
+const T& getLastElement(const T & v) {
+  return v;
+}
+
+template <class T, class... Ts>
+typename std::tuple_element<
+  sizeof...(Ts),
+  std::tuple<T, Ts...> >::type const&
+  getLastElement(const T& v, const Ts&... vs) {
+  return getLastElement(vs...);
+}
+
+/*******************************************************************************
+ * Conversions from integral types to string types.
+ ******************************************************************************/
+
+// Returns the offset of the formatted string from the start of
+// the supplied buffer. The new string will be at range
+// [buf+begin,buf+bufLen). Uint will be either uint32_t or uint64_t.
+template <class Uint>
+size_t uintToBuffer(char*const buffer, size_t bufLen, Uint v) {
+  extern const char digit1[101], digit2[101];
+  for (;;) {
+    if (v < 100) {
+      if (v < 10) {
+        buffer[--bufLen] = static_cast<char>(v + '0');
+      } else {
+        size_t r = static_cast<size_t>(v);
+        bufLen -= 2;
+        buffer[bufLen] = digit1[r];
+        buffer[bufLen + 1] = digit2[r];
+      }
+      break;
+    }
+    Uint t = v;
+    v /= 100;
+    size_t r = static_cast<size_t> (t - v * 100);
+    bufLen -= 2;
+    buffer[bufLen] = digit1[r];
+    buffer[bufLen + 1] = digit2[r];
+  }
+  return bufLen;
+}
+
+const size_t kMaxInt64BufLen = 21;// 19 + 1 for possible '-' sign + 1 for \0
+
+}                                 // namespace detail
+
+/**
+ * A single char gets appended.
+ */
+template <class Tgt>
+void toAppend(char value, Tgt * result) {
+  *result += value;
+}
+
+/**
+ * Everything implicitly convertible to const char* gets appended.
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_convertible<Src, const char*>::value
+  && detail::IsSomeString<Tgt>::value>::type
+toAppend(Src value, Tgt * result) {
+  // Treat null pointers like an empty string, as in:
+  // operator<<(std::ostream&, const char*).
+  const char* c = value;
+  if (c) {
+    result->append(value);
+  }
+}
+
+/**
+ * Strings get appended, too.
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  detail::IsSomeString<Src>::value && detail::IsSomeString<Tgt>::value>::type
+toAppend(const Src& value, Tgt * result) {
+  result->append(value);
+}
+
+/**
+ * and StringPiece objects too
+ */
+template <class Tgt>
+typename std::enable_if<
+   detail::IsSomeString<Tgt>::value>::type
+toAppend(StringPiece value, Tgt * result) {
+  result->append(value.data(), value.size());
+}
+
+/**
+ * There's no implicit conversion from fbstring to other string types,
+ * so make a specialization.
+ */
+template <class Tgt>
+typename std::enable_if<
+   detail::IsSomeString<Tgt>::value>::type
+toAppend(const fbstring& value, Tgt * result) {
+  result->append(value.data(), value.size());
+}
+
+/**
+ * int32_t and int64_t to string (by appending) go through here. The
+ * result is APPENDED to a preexisting string passed as the second
+ * parameter. For convenience, the function also returns a reference
+ * to *result. This should be efficient with fbstring because fbstring
+ * incurs no dynamic allocation below 23 bytes and no number has more
+ * than 22 bytes in its textual representation (20 for digits, one for
+ * sign, one for the terminating 0).
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_integral<Src>::value && std::is_signed<Src>::value
+  && detail::IsSomeString<Tgt>::value && sizeof(Src) >= 4>::type
+toAppend(Src value, Tgt * result) {
+  typedef typename std::make_unsigned<Src>::type Usrc;
+  char buffer[detail::kMaxInt64BufLen];
+  size_t begin;
+  if (value < 0) {
+    begin = detail::uintToBuffer(buffer, sizeof(buffer),
+                                 static_cast<Usrc>(-value));
+    DCHECK_GE(begin, 1);
+    buffer[--begin] = '-';
+  } else {
+    begin = detail::uintToBuffer(buffer, sizeof(buffer),
+                                 static_cast<Usrc>(value));
+  }
+  result->append(buffer + begin, buffer + sizeof(buffer));
+}
+
+/**
+ * As above, but for uint32_t and uint64_t.
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_integral<Src>::value && !std::is_signed<Src>::value
+  && detail::IsSomeString<Tgt>::value && sizeof(Src) >= 4>::type
+toAppend(Src value, Tgt * result) {
+  char buffer[detail::kMaxInt64BufLen];
+  const size_t begin = detail::uintToBuffer(buffer, sizeof(buffer), value);
+  result->append(buffer + begin, buffer + sizeof(buffer));
+}
+
+/**
+ * All small signed and unsigned integers to string go through 32-bit
+ * types int32_t and uint32_t, respectively.
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_integral<Src>::value
+  && detail::IsSomeString<Tgt>::value && sizeof(Src) < 4>::type
+toAppend(Src value, Tgt * result) {
+  typedef typename
+    std::conditional<std::is_signed<Src>::value, int64_t, uint64_t>::type
+    Intermediate;
+  toAppend<Tgt>(static_cast<Intermediate>(value), result);
+}
+
+/**
+ * Enumerated values get appended as integers.
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_enum<Src>::value && detail::IsSomeString<Tgt>::value>::type
+toAppend(Src value, Tgt * result) {
+  /* static */ if (Src(-1) < 0) {
+    /* static */ if (sizeof(Src) <= sizeof(int)) {
+      toAppend(static_cast<int>(value), result);
+    } else {
+      toAppend(static_cast<long>(value), result);
+    }
+  } else {
+    /* static */ if (sizeof(Src) <= sizeof(int)) {
+      toAppend(static_cast<unsigned int>(value), result);
+    } else {
+      toAppend(static_cast<unsigned long>(value), result);
+    }
+  }
+}
+
+/*******************************************************************************
+ * Conversions from floating-point types to string types.
+ ******************************************************************************/
+
+/** Wrapper around DoubleToStringConverter **/
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_floating_point<Src>::value
+  && detail::IsSomeString<Tgt>::value>::type
+toAppend(
+  Src value,
+  Tgt * result,
+  double_conversion::DoubleToStringConverter::DtoaMode mode,
+  unsigned int numDigits) {
+  using namespace double_conversion;
+  DoubleToStringConverter
+    conv(DoubleToStringConverter::NO_FLAGS,
+         "infinity", "NaN", 'E',
+         -6,  // decimal in shortest low
+         21,  // decimal in shortest high
+         6,   // max leading padding zeros
+         1);  // max trailing padding zeros
+  char buffer[256];
+  StringBuilder builder(buffer, sizeof(buffer));
+  switch (mode) {
+    case DoubleToStringConverter::SHORTEST:
+      conv.ToShortest(value, &builder);
+      break;
+    case DoubleToStringConverter::FIXED:
+      conv.ToFixed(value, numDigits, &builder);
+      break;
+    default:
+      CHECK(mode == DoubleToStringConverter::PRECISION);
+      conv.ToPrecision(value, numDigits, &builder);
+      break;
+  }
+  const size_t length = builder.position();
+  builder.Finalize();
+  result->append(buffer, length);
+}
+
+/**
+ * As above, but for floating point
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  std::is_floating_point<Src>::value
+  && detail::IsSomeString<Tgt>::value>::type
+toAppend(Src value, Tgt * result) {
+  toAppend(
+    value, result, double_conversion::DoubleToStringConverter::SHORTEST, 0);
+}
+
+/**
+ * Variadic conversion to string. Appends each element in turn.
+ */
+template <class T, class... Ts>
+typename std::enable_if<sizeof...(Ts) >= 2
+  && detail::IsSomeString<
+  typename std::remove_pointer<
+    typename std::tuple_element<
+      sizeof...(Ts) - 1, std::tuple<Ts...>
+      >::type>::type>::value>::type
+toAppend(const T& v, const Ts&... vs) {
+  toAppend(v, detail::getLastElement(vs...));
+  toAppend(vs...);
+}
+
+/**
+ * Variadic base case: do nothing.
+ */
+template <class Tgt>
+typename std::enable_if<detail::IsSomeString<Tgt>::value>::type
+toAppend(Tgt* result) {
+}
+
+/**
+ * to<SomeString>(v1, v2, ...) uses toAppend() (see below) as back-end
+ * for all types.
+ */
+template <class Tgt, class... Ts>
+typename std::enable_if<detail::IsSomeString<Tgt>::value, Tgt>::type
+to(const Ts&... vs) {
+  Tgt result;
+  toAppend(vs..., &result);
+  return result;
+}
+
+/*******************************************************************************
+ * Conversions from string types to integral types.
+ ******************************************************************************/
+
+namespace detail {
+
+/**
+ * Finds the first non-digit in a string. The number of digits
+ * searched depends on the precision of the Tgt integral. Assumes the
+ * string starts with NO whitespace and NO sign.
+ *
+ * The semantics of the routine is:
+ *   for (;; ++b) {
+ *     if (b >= e || !isdigit(*b)) return b;
+ *   }
+ *
+ *  Complete unrolling marks bottom-line (i.e. entire conversion)
+ *  improvements of 20%.
+ */
+  template <class Tgt>
+  const char* findFirstNonDigit(const char* b, const char* e) {
+    for (; b < e; ++b) {
+      auto const c = static_cast<unsigned>(*b) - '0';
+      if (c >= 10) break;
+    }
+    return b;
+  }
+
+  // Maximum value of number when represented as a string
+  template <class T> struct MaxString {
+    static const char*const value;
+  };
+
+/**
+ * String represented as a pair of pointers to char to unsigned
+ * integrals. Assumes NO whitespace before or after, and also that the
+ * string is composed entirely of digits. Tgt must be unsigned, and no
+ * sign is allowed in the string (even it's '+'). String may be empty,
+ * in which case digits_to throws.
+ */
+  template <class Tgt>
+  Tgt digits_to(const char * b, const char * e) {
+
+    static_assert(!std::is_signed<Tgt>::value, "Unsigned type expected");
+    assert(b <= e);
+
+    const size_t size = e - b;
+
+    /* Although the string is entirely made of digits, we still need to
+     * check for overflow.
+     */
+    if (size >= std::numeric_limits<Tgt>::digits10 + 1) {
+      // Leading zeros? If so, recurse to keep things simple
+      if (b < e && *b == '0') {
+        for (++b;; ++b) {
+          if (b == e) return 0; // just zeros, e.g. "0000"
+          if (*b != '0') return digits_to<Tgt>(b, e);
+        }
+      }
+      FOLLY_RANGE_CHECK(size == std::numeric_limits<Tgt>::digits10 + 1 &&
+                        strncmp(b, detail::MaxString<Tgt>::value, size) <= 0,
+                        "Numeric overflow upon conversion");
+    }
+
+    // Here we know that the number won't overflow when
+    // converted. Proceed without checks.
+
+    static const Tgt power10[20] = {
+      static_cast<Tgt>(10000000000000000000UL),
+      static_cast<Tgt>(1000000000000000000UL),
+      static_cast<Tgt>(100000000000000000UL),
+      static_cast<Tgt>(10000000000000000UL),
+      static_cast<Tgt>(1000000000000000UL),
+      static_cast<Tgt>(100000000000000UL),
+      static_cast<Tgt>(10000000000000UL),
+      static_cast<Tgt>(1000000000000UL),
+      static_cast<Tgt>(100000000000UL),
+      static_cast<Tgt>(10000000000UL),
+      static_cast<Tgt>(1000000000UL),
+      static_cast<Tgt>(100000000UL),
+      static_cast<Tgt>(10000000UL),
+      static_cast<Tgt>(1000000UL),
+      static_cast<Tgt>(100000UL),
+      static_cast<Tgt>(10000UL),
+      static_cast<Tgt>(1000UL),
+      static_cast<Tgt>(100UL),
+      static_cast<Tgt>(10UL),
+      static_cast<Tgt>(1UL),
+    };
+
+    size_t powIdx = sizeof(power10) / sizeof(*power10) - size;
+    Tgt result = 0;
+
+    for (; e - b >= 4; b += 4, powIdx += 4) {
+      const auto c0 = static_cast<unsigned>(*b) - '0';
+      if (c0 >= 10) goto failure;
+      const auto r0 = power10[powIdx] * c0;
+      const auto c1 = static_cast<unsigned>(b[1]) - '0';
+      if (c1 >= 10) goto failure;
+      const auto r1 = power10[powIdx + 1] * c1;
+      const auto c2 = static_cast<unsigned>(b[2]) - '0';
+      if (c2 >= 10) goto failure;
+      const auto r2 = power10[powIdx + 2] * c2;
+      const auto c3 = static_cast<unsigned>(b[3]) - '0';
+      if (c3 >= 10) goto failure;
+      const auto r3 = power10[powIdx + 3] * c3;
+      result += r0 + r1 + r2 + r3;
+    }
+
+    switch (e - b) {
+      case 3: {
+        const auto c0 = static_cast<unsigned>(*b) - '0';
+        if (c0 >= 10) goto failure;
+        const auto c1 = static_cast<unsigned>(b[1]) - '0';
+        if (c1 >= 10) goto failure;
+        const auto c2 = static_cast<unsigned>(b[2]) - '0';
+        if (c2 >= 10) goto failure;
+        return result + 100 * c0 + 10 * c1 + c2;
+      }
+      case 2: {
+        const auto c0 = static_cast<unsigned>(*b) - '0';
+        if (c0 >= 10) goto failure;
+        const auto c1 = static_cast<unsigned>(b[1]) - '0';
+        if (c1 >= 10) goto failure;
+        return result + 10 * c0 + c1;
+      }
+      case 1: {
+        const auto c0 = static_cast<unsigned>(*b) - '0';
+        if (c0 >= 10) goto failure;
+        return result + c0;
+      }
+    }
+
+    assert(b == e);
+    FOLLY_RANGE_CHECK(size > 0, "Found no digits to convert in input");
+    return result;
+
+    failure:
+    throw std::range_error("Cannot convert string " +
+                           std::string(e - size, e) + " to integral.");
+  }
+
+  bool str_to_bool(StringPiece * src);
+
+}                                 // namespace detail
+
+/**
+ * String represented as a pair of pointers to char to unsigned
+ * integrals. Assumes NO whitespace before or after.
+ */
+template <class Tgt>
+typename std::enable_if<
+  std::is_integral<Tgt>::value && !std::is_signed<Tgt>::value
+  && !std::is_same<typename std::remove_cv<Tgt>::type, bool>::value,
+  Tgt>::type
+to(const char * b, const char * e) {
+  return detail::digits_to<Tgt>(b, e);
+}
+
+/**
+ * String represented as a pair of pointers to char to signed
+ * integrals. Assumes NO whitespace before or after. Allows an
+ * optional leading sign.
+ */
+template <class Tgt>
+typename std::enable_if<
+  std::is_integral<Tgt>::value && std::is_signed<Tgt>::value,
+  Tgt>::type
+to(const char * b, const char * e) {
+  FOLLY_RANGE_CHECK(b < e, "Empty input string in conversion to integral");
+  if (!isdigit(*b)) {
+    if (*b == '-') {
+      Tgt result = -to<typename std::make_unsigned<Tgt>::type>(b + 1, e);
+      FOLLY_RANGE_CHECK(result <= 0, "Negative overflow.");
+      return result;
+    }
+    FOLLY_RANGE_CHECK(*b == '+', "Invalid lead character");
+    ++b;
+  }
+  Tgt result = to<typename std::make_unsigned<Tgt>::type>(b, e);
+  FOLLY_RANGE_CHECK(result >= 0, "Overflow.");
+  return result;
+}
+
+/**
+ * Parsing strings to integrals. These routines differ from
+ * to<integral>(string) in that they take a POINTER TO a StringPiece
+ * and alter that StringPiece to reflect progress information.
+ */
+
+/**
+ * StringPiece to integrals, with progress information. Alters the
+ * StringPiece parameter to munch the already-parsed characters.
+ */
+template <class Tgt>
+typename std::enable_if<
+  std::is_integral<Tgt>::value
+  && !std::is_same<typename std::remove_cv<Tgt>::type, bool>::value,
+  Tgt>::type
+to(StringPiece * src) {
+
+  auto b = src->data(), past = src->data() + src->size();
+  for (;; ++b) {
+    FOLLY_RANGE_CHECK(b < past, "No digits found in input string");
+    if (!isspace(*b)) break;
+  }
+
+  auto m = b;
+
+  // First digit is customized because we test for sign
+  bool negative = false;
+  /* static */ if (std::is_signed<Tgt>::value) {
+    if (!isdigit(*m)) {
+      if (*m == '-') {
+        negative = true;
+      } else {
+        FOLLY_RANGE_CHECK(*m == '+', "Invalid leading character in conversion"
+                          " to integral");
+      }
+      ++b;
+      ++m;
+    }
+  }
+  FOLLY_RANGE_CHECK(m < past, "No digits found in input string");
+  FOLLY_RANGE_CHECK(isdigit(*m), "Non-digit character found");
+  m = detail::findFirstNonDigit<Tgt>(m + 1, past);
+
+  Tgt result;
+  /* static */ if (!std::is_signed<Tgt>::value) {
+    result = detail::digits_to<typename std::make_unsigned<Tgt>::type>(b, m);
+  } else {
+    auto t = detail::digits_to<typename std::make_unsigned<Tgt>::type>(b, m);
+    if (negative) {
+      result = -t;
+      FOLLY_RANGE_CHECK(result <= 0, "Negative overflow");
+    } else {
+      result = t;
+      FOLLY_RANGE_CHECK(result >= 0, "Overflow");
+    }
+  }
+  src->advance(m - src->data());
+  return result;
+}
+
+/**
+ * StringPiece to bool, with progress information. Alters the
+ * StringPiece parameter to munch the already-parsed characters.
+ */
+template <class Tgt>
+typename std::enable_if<
+  std::is_same<typename std::remove_cv<Tgt>::type, bool>::value,
+  Tgt>::type
+to(StringPiece * src) {
+  return detail::str_to_bool(src);
+}
+
+namespace detail {
+
+/**
+ * Enforce that the suffix following a number is made up only of whitespace.
+ */
+inline void enforceWhitespace(const char* b, const char* e) {
+  for (; b != e; ++b) {
+    FOLLY_RANGE_CHECK(isspace(*b), to<std::string>("Non-whitespace: ", *b));
+  }
+}
+
+}  // namespace detail
+
+/**
+ * String or StringPiece to integrals. Accepts leading and trailing
+ * whitespace, but no non-space trailing characters.
+ */
+template <class Tgt>
+typename std::enable_if<
+  std::is_integral<Tgt>::value,
+  Tgt>::type
+to(StringPiece src) {
+  Tgt result = to<Tgt>(&src);
+  detail::enforceWhitespace(src.data(), src.data() + src.size());
+  return result;
+}
+
+/*******************************************************************************
+ * Conversions from string types to floating-point types.
+ ******************************************************************************/
+
+/**
+ * StringPiece to double, with progress information. Alters the
+ * StringPiece parameter to munch the already-parsed characters.
+ */
+template <class Tgt>
+inline typename std::enable_if<
+  std::is_floating_point<Tgt>::value,
+  Tgt>::type
+to(StringPiece *const src) {
+  using namespace double_conversion;
+  static StringToDoubleConverter
+    conv(StringToDoubleConverter::ALLOW_TRAILING_JUNK
+         | StringToDoubleConverter::ALLOW_LEADING_SPACES,
+         0.0,
+         // return this for junk input string
+         std::numeric_limits<double>::quiet_NaN(),
+         nullptr, nullptr);
+
+  FOLLY_RANGE_CHECK(!src->empty(), "No digits found in input string");
+
+  int length;
+  auto result = conv.StringToDouble(src->data(), src->size(),
+                                       &length); // processed char count
+
+  if (!std::isnan(result)) {
+    src->advance(length);
+    return result;
+  }
+
+  for (;; src->advance(1)) {
+    if (src->empty()) {
+      throw std::range_error("Unable to convert an empty string"
+                             " to a floating point value.");
+    }
+    if (!isspace(src->front())) {
+      break;
+    }
+  }
+
+  // Was that "inf[inity]"?
+  if (src->size() >= 3 && toupper((*src)[0]) == 'I'
+        && toupper((*src)[1]) == 'N' && toupper((*src)[2]) == 'F') {
+    if (src->size() >= 8 &&
+        toupper((*src)[3]) == 'I' &&
+        toupper((*src)[4]) == 'N' &&
+        toupper((*src)[5]) == 'I' &&
+        toupper((*src)[6]) == 'T' &&
+        toupper((*src)[7]) == 'Y') {
+      src->advance(8);
+    } else {
+      src->advance(3);
+    }
+    return std::numeric_limits<Tgt>::infinity();
+  }
+
+  // Was that "-inf[inity]"?
+  if (src->size() >= 4 && toupper((*src)[0]) == '-'
+      && toupper((*src)[1]) == 'I' && toupper((*src)[2]) == 'N'
+      && toupper((*src)[3]) == 'F') {
+    if (src->size() >= 9 &&
+        toupper((*src)[4]) == 'I' &&
+        toupper((*src)[5]) == 'N' &&
+        toupper((*src)[6]) == 'I' &&
+        toupper((*src)[7]) == 'T' &&
+        toupper((*src)[8]) == 'Y') {
+      src->advance(9);
+    } else {
+      src->advance(4);
+    }
+    return -std::numeric_limits<Tgt>::infinity();
+  }
+
+  // "nan"?
+  if (src->size() >= 3 && toupper((*src)[0]) == 'N'
+        && toupper((*src)[1]) == 'A' && toupper((*src)[2]) == 'N') {
+    src->advance(3);
+    return std::numeric_limits<Tgt>::quiet_NaN();
+  }
+
+  // All bets are off
+  throw std::range_error("Unable to convert \"" + src->toString()
+                         + "\" to a floating point value.");
+}
+
+/**
+ * Any string, const char*, or StringPiece to double.
+ */
+template <class Tgt>
+typename std::enable_if<
+  std::is_floating_point<Tgt>::value,
+  Tgt>::type
+to(StringPiece src) {
+  Tgt result = to<double>(&src);
+  detail::enforceWhitespace(src.data(), src.data() + src.size());
+  return result;
+}
+
+/*******************************************************************************
+ * Integral to floating point and back
+ ******************************************************************************/
+
+/**
+ * Checked conversion from integral to flating point and back. The
+ * result must be convertible back to the source type without loss of
+ * precision. This seems Draconian but sometimes is what's needed, and
+ * complements existing routines nicely. For various rounding
+ * routines, see <math>.
+ */
+template <class Tgt, class Src>
+typename std::enable_if<
+  (std::is_integral<Src>::value && std::is_floating_point<Tgt>::value)
+  ||
+  (std::is_floating_point<Src>::value && std::is_integral<Tgt>::value),
+  Tgt>::type
+to(const Src & value) {
+  Tgt result = value;
+  auto witness = static_cast<Src>(result);
+  if (value != witness) {
+    throw std::range_error(
+      to<std::string>("to<>: loss of precision when converting ", value,
+                      " to type ", typeid(Tgt).name()).c_str());
+  }
+  return result;
+}
+
+/*******************************************************************************
+ * Enum to anything and back
+ ******************************************************************************/
+
+template <class Tgt, class Src>
+typename std::enable_if<std::is_enum<Src>::value, Tgt>::type
+to(const Src & value) {
+  // TODO: uncomment this when underlying_type is available
+  // return to<Tgt>(static_cast<typename std::underlying_type<Src>::type>(
+  //    value));
+  /* static */ if (Src(-1) < 0) {
+    /* static */ if (sizeof(Src) <= sizeof(int)) {
+      return to<Tgt>(static_cast<int>(value));
+    } else {
+      return to<Tgt>(static_cast<long>(value));
+    }
+  } else {
+    /* static */ if (sizeof(Src) <= sizeof(int)) {
+      return to<Tgt>(static_cast<unsigned int>(value));
+    } else {
+      return to<Tgt>(static_cast<unsigned long>(value));
+    }
+  }
+}
+
+template <class Tgt, class Src>
+typename std::enable_if<std::is_enum<Tgt>::value, Tgt>::type
+to(const Src & value) {
+  // TODO: uncomment this when underlying_type is available
+  // return static_cast<Tgt>(
+  //    to<typename std::underlying_type<Tgt>::type>(value));
+  /* static */ if (Tgt(-1) < 0) {
+    /* static */ if (sizeof(Tgt) <= sizeof(int)) {
+      return static_cast<Tgt>(to<int>(value));
+    } else {
+      return static_cast<Tgt>(to<long>(value));
+    }
+  } else {
+    /* static */ if (sizeof(Tgt) <= sizeof(int)) {
+      return static_cast<Tgt>(to<unsigned int>(value));
+    } else {
+      return static_cast<Tgt>(to<unsigned long>(value));
+    }
+  }
+}
+
+} // namespace folly
+
+// FOLLY_CONV_INTERNAL is defined by Conv.cpp.  Keep the FOLLY_RANGE_CHECK
+// macro for use in Conv.cpp, but #undefine it everywhere else we are included,
+// to avoid defining this global macro name in other files that include Conv.h.
+#ifndef FOLLY_CONV_INTERNAL
+#undef FOLLY_RANGE_CHECK
+#endif
+
+#endif /* FOLLY_BASE_CONV_H_ */
diff --git a/folly/DiscriminatedPtr.h b/folly/DiscriminatedPtr.h
new file mode 100644
index 00000000..1332701d
--- /dev/null
+++ b/folly/DiscriminatedPtr.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Discriminated pointer: Type-safe pointer to one of several types.
+ *
+ * Similar to boost::variant, but has no space overhead over a raw pointer, as
+ * it relies on the fact that (on x86_64) there are 16 unused bits in a
+ * pointer.
+ *
+ * @author Tudor Bosman (tudorb@fb.com)
+ */
+
+#ifndef FOLLY_DISCRIMINATEDPTR_H_
+#define FOLLY_DISCRIMINATEDPTR_H_
+
+#include <limits>
+#include <stdexcept>
+#include <glog/logging.h>
+#include "folly/Likely.h"
+#include "folly/detail/DiscriminatedPtrDetail.h"
+
+#ifndef __x86_64__
+# error "DiscriminatedPtr is x64-specific code."
+#endif
+
+namespace folly {
+
+/**
+ * Discriminated pointer.
+ *
+ * Given a list of types, a DiscriminatedPtr<Types...> may point to an object
+ * of one of the given types, or may be empty.  DiscriminatedPtr is type-safe:
+ * you may only get a pointer to the type that you put in, otherwise get
+ * throws an exception (and get_nothrow returns nullptr)
+ *
+ * This pointer does not do any kind of lifetime management -- it's not a
+ * "smart" pointer.  You are responsible for deallocating any memory used
+ * to hold pointees, if necessary.
+ */
+template <typename... Types>
+class DiscriminatedPtr {
+  // <, not <=, as our indexes are 1-based (0 means "empty")
+  static_assert(sizeof...(Types) < std::numeric_limits<uint16_t>::max(),
+                "too many types");
+
+ public:
+  /**
+   * Create an empty DiscriminatedPtr.
+   */
+  DiscriminatedPtr() : data_(0) {
+  }
+
+  /**
+   * Create a DiscriminatedPtr that points to an object of type T.
+   * Fails at compile time if T is not a valid type (listed in Types)
+   */
+  template <typename T>
+  explicit DiscriminatedPtr(T* ptr) {
+    set(ptr, typeIndex<T>());
+  }
+
+  /**
+   * Set this DiscriminatedPtr to point to an object of type T.
+   * Fails at compile time if T is not a valid type (listed in Types)
+   */
+  template <typename T>
+  void set(T* ptr) {
+    set(ptr, typeIndex<T>());
+  }
+
+  /**
+   * Get a pointer to the object that this DiscriminatedPtr points to, if it is
+   * of type T.  Fails at compile time if T is not a valid type (listed in
+   * Types), and returns nullptr if this DiscriminatedPtr is empty or points to
+   * an object of a different type.
+   */
+  template <typename T>
+  T* get_nothrow() noexcept {
+    void* p = LIKELY(hasType<T>()) ? ptr() : nullptr;
+    return static_cast<T*>(p);
+  }
+
+  template <typename T>
+  const T* get_nothrow() const noexcept {
+    const void* p = LIKELY(hasType<T>()) ? ptr() : nullptr;
+    return static_cast<const T*>(p);
+  }
+
+  /**
+   * Get a pointer to the object that this DiscriminatedPtr points to, if it is
+   * of type T.  Fails at compile time if T is not a valid type (listed in
+   * Types), and throws std::invalid_argument if this DiscriminatedPtr is empty
+   * or points to an object of a different type.
+   */
+  template <typename T>
+  T* get() {
+    if (UNLIKELY(!hasType<T>())) {
+      throw std::invalid_argument("Invalid type");
+    }
+    return static_cast<T*>(ptr());
+  }
+
+  template <typename T>
+  const T* get() const {
+    if (UNLIKELY(!hasType<T>())) {
+      throw std::invalid_argument("Invalid type");
+    }
+    return static_cast<const T*>(ptr());
+  }
+
+  /**
+   * Return true iff this DiscriminatedPtr is empty.
+   */
+  bool empty() const {
+    return index() == 0;
+  }
+
+  /**
+   * Return true iff the object pointed by this DiscriminatedPtr has type T,
+   * false otherwise.  Fails at compile time if T is not a valid type (listed
+   * in Types...)
+   */
+  template <typename T>
+  bool hasType() const {
+    return index() == typeIndex<T>();
+  }
+
+  /**
+   * Clear this DiscriminatedPtr, making it empty.
+   */
+  void clear() {
+    data_ = 0;
+  }
+
+  /**
+   * Assignment operator from a pointer of type T.
+   */
+  template <typename T>
+  DiscriminatedPtr& operator=(T* ptr) {
+    set(ptr);
+    return *this;
+  }
+
+  /**
+   * Apply a visitor to this object, calling the appropriate overload for
+   * the type currently stored in DiscriminatedPtr.  Throws invalid_argument
+   * if the DiscriminatedPtr is empty.
+   *
+   * The visitor must meet the following requirements:
+   *
+   * - The visitor must allow invocation as a function by overloading
+   *   operator(), unambiguously accepting all values of type T* (or const T*)
+   *   for all T in Types...
+   * - All operations of the function object on T* (or const T*) must
+   *   return the same type (or a static_assert will fire).
+   */
+  template <typename V>
+  typename dptr_detail::VisitorResult<V, Types...>::type apply(V&& visitor) {
+    size_t n = index();
+    if (n == 0) throw std::invalid_argument("Empty DiscriminatedPtr");
+    return dptr_detail::ApplyVisitor<V, Types...>()(
+      n, std::forward<V>(visitor), ptr());
+  }
+
+  template <typename V>
+  typename dptr_detail::ConstVisitorResult<V, Types...>::type apply(V&& visitor)
+  const {
+    size_t n = index();
+    if (n == 0) throw std::invalid_argument("Empty DiscriminatedPtr");
+    return dptr_detail::ApplyConstVisitor<V, Types...>()(
+      n, std::forward<V>(visitor), ptr());
+  }
+
+ private:
+  /**
+   * Get the 1-based type index of T in Types.
+   */
+  template <typename T>
+  size_t typeIndex() const {
+    return dptr_detail::GetTypeIndex<T, Types...>::value;
+  }
+
+  uint16_t index() const { return data_ >> 48; }
+  void* ptr() const {
+    return reinterpret_cast<void*>(data_ & ((1ULL << 48) - 1));
+  }
+
+  void set(void* p, uint16_t v) {
+    uintptr_t ip = reinterpret_cast<uintptr_t>(p);
+    CHECK(!(ip >> 48));
+    ip |= static_cast<uintptr_t>(v) << 48;
+    data_ = ip;
+  }
+
+  /**
+   * We store a pointer in the least significant 48 bits of data_, and a type
+   * index (0 = empty, or 1-based index in Types) in the most significant 16
+   * bits.  We rely on the fact that pointers have their most significant 16
+   * bits clear on x86_64.
+   */
+  uintptr_t data_;
+};
+
+}  // namespace folly
+
+#endif /* FOLLY_DISCRIMINATEDPTR_H_ */
+
diff --git a/folly/FBString.h b/folly/FBString.h
new file mode 100644
index 00000000..84b5841a
--- /dev/null
+++ b/folly/FBString.h
@@ -0,0 +1,2284 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author: Andrei Alexandrescu (aalexandre)
+// String type.
+
+#ifndef FOLLY_BASE_FBSTRING_H_
+#define FOLLY_BASE_FBSTRING_H_
+
+/**
+   fbstring's behavior can be configured via two macro definitions, as
+   follows. Normally, fbstring does not write a '\0' at the end of
+   each string whenever it changes the underlying characters. Instead,
+   it lazily writes the '\0' whenever either c_str() or data()
+   called.
+
+   This is standard-compliant behavior and may save costs in some
+   circumstances. However, it may be surprising to some client code
+   because c_str() and data() are const member functions (fbstring
+   uses the "mutable" storage class for its own state).
+
+   In order to appease client code that expects fbstring to be
+   zero-terminated at all times, if the preprocessor symbol
+   FBSTRING_CONSERVATIVE is defined, fbstring does exactly that,
+   i.e. it goes the extra mile to guarantee a '\0' is always planted
+   at the end of its data.
+
+   On the contrary, if the desire is to debug faulty client code that
+   unduly assumes the '\0' is present, fbstring plants a '^' (i.e.,
+   emphatically NOT a zero) at the end of each string if
+   FBSTRING_PERVERSE is defined. (Calling c_str() or data() still
+   writes the '\0', of course.)
+
+   The preprocessor symbols FBSTRING_PERVERSE and
+   FBSTRING_CONSERVATIVE cannot be defined simultaneously. This is
+   enforced during preprocessing.
+*/
+
+//#define FBSTRING_PERVERSE
+//#define FBSTRING_CONSERVATIVE
+
+#ifdef FBSTRING_PERVERSE
+#ifdef FBSTRING_CONSERVATIVE
+#error Cannot define both FBSTRING_PERVERSE and FBSTRING_CONSERVATIVE.
+#endif
+#endif
+
+// This file appears in two locations: inside fbcode and in the
+// libstdc++ source code (when embedding fbstring as std::string).
+// To aid in this schizophrenic use, two macros are defined in
+// c++config.h:
+//   _LIBSTDCXX_FBSTRING - Set inside libstdc++.  This is useful to
+//      gate use inside fbcode v. libstdc++
+#include <bits/c++config.h>
+
+#ifdef _LIBSTDCXX_FBSTRING
+
+#pragma GCC system_header
+
+// Handle the cases where the fbcode version (folly/Malloc.h) is included
+// either before or after this inclusion.  */home/engshare/third-party/src/
+// libgcc/libgcc-4.6.2/gcc-4.6.2-20111027/libstdc++-v3/include/bits/
+// basic_string.h* has a more detailed explanation of why this is necessary.
+#ifdef FOLLY_MALLOC_H_
+#undef FOLLY_MALLOC_H_
+#include "basic_fbstring_malloc.h"
+#else
+#include "basic_fbstring_malloc.h"
+#undef FOLLY_MALLOC_H_
+#endif
+
+#else // !_LIBSTDCXX_FBSTRING
+
+#include <string>
+#include <cstring>
+#include <cassert>
+
+#include "folly/Traits.h"
+#include "folly/Malloc.h"
+#include "folly/Hash.h"
+
+#endif
+
+#include <atomic>
+#include <limits>
+#include <type_traits>
+
+#ifdef _LIBSTDCXX_FBSTRING
+namespace std _GLIBCXX_VISIBILITY(default) {
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+#else
+namespace folly {
+#endif
+
+namespace fbstring_detail {
+
+template <class InIt, class OutIt>
+inline
+OutIt copy_n(InIt b,
+             typename std::iterator_traits<InIt>::difference_type n,
+             OutIt d) {
+  for (; n != 0; --n, ++b, ++d) {
+    assert((const void*)&*d != &*b);
+    *d = *b;
+  }
+  return d;
+}
+
+template <class Pod, class T>
+inline void pod_fill(Pod* b, Pod* e, T c) {
+  assert(b && e && b <= e);
+  /*static*/ if (sizeof(T) == 1) {
+    memset(b, c, e - b);
+  } else {
+    auto const ee = b + ((e - b) & ~7u);
+    for (; b != ee; b += 8) {
+      b[0] = c;
+      b[1] = c;
+      b[2] = c;
+      b[3] = c;
+      b[4] = c;
+      b[5] = c;
+      b[6] = c;
+      b[7] = c;
+    }
+    // Leftovers
+    for (; b != e; ++b) {
+      *b = c;
+    }
+  }
+}
+
+/*
+ * Lightly structured memcpy, simplifies copying PODs and introduces
+ * some asserts
+ */
+template <class Pod>
+inline Pod* pod_copy(const Pod* b, const Pod* e, Pod* d) {
+  assert(e >= b);
+  assert(d >= e || d + (e - b) <= b);
+  const size_t s = e - b;
+  std::memcpy(d, b, s * sizeof(*b));
+  return d + s;
+}
+
+/*
+ * Lightly structured memmove, simplifies copying PODs and introduces
+ * some asserts
+ */
+template <class Pod>
+inline void pod_move(const Pod* b, const Pod* e, Pod* d) {
+  assert(e >= b);
+  memmove(d, b, (e - b) * sizeof(*b));
+}
+
+} // namespace fbstring_detail
+
+/**
+ * Defines a special acquisition method for constructing fbstring
+ * objects. AcquireMallocatedString means that the user passes a
+ * pointer to a malloc-allocated string that the fbstring object will
+ * take into custody.
+ */
+enum class AcquireMallocatedString {};
+
+/*
+ * fbstring_core_model is a mock-up type that defines all required
+ * signatures of a fbstring core. The fbstring class itself uses such
+ * a core object to implement all of the numerous member functions
+ * required by the standard.
+ *
+ * If you want to define a new core, copy the definition below and
+ * implement the primitives. Then plug the core into basic_fbstring as
+ * a template argument.
+
+template <class Char>
+class fbstring_core_model {
+public:
+  fbstring_core_model();
+  fbstring_core_model(const fbstring_core_model &);
+  ~fbstring_core_model();
+  // Returns a pointer to string's buffer (currently only contiguous
+  // strings are supported). The pointer is guaranteed to be valid
+  // until the next call to a non-const member function.
+  const Char * data() const;
+  // Much like data(), except the string is prepared to support
+  // character-level changes. This call is a signal for
+  // e.g. reference-counted implementation to fork the data. The
+  // pointer is guaranteed to be valid until the next call to a
+  // non-const member function.
+  Char * mutable_data();
+  // Returns a pointer to string's buffer and guarantees that a
+  // readable '\0' lies right after the buffer. The pointer is
+  // guaranteed to be valid until the next call to a non-const member
+  // function.
+  const Char * c_str() const;
+  // Shrinks the string by delta characters. Asserts that delta <=
+  // size().
+  void shrink(size_t delta);
+  // Expands the string by delta characters (i.e. after this call
+  // size() will report the old size() plus delta) but without
+  // initializing the expanded region. The caller is expected to fill
+  // the expanded area appropriately.
+  void expand_noinit(size_t delta);
+  // Expands the string by one character and sets the last character
+  // to c.
+  void push_back(Char c);
+  // Returns the string's size.
+  size_t size() const;
+  // Returns the string's capacity, i.e. maximum size that the string
+  // can grow to without reallocation. Note that for reference counted
+  // strings that's technically a lie - even assigning characters
+  // within the existing size would cause a reallocation.
+  size_t capacity() const;
+  // Returns true if the data underlying the string is actually shared
+  // across multiple strings (in a refcounted fashion).
+  bool isShared() const;
+  // Makes sure that at least minCapacity characters are available for
+  // the string without reallocation. For reference-counted strings,
+  // it should fork the data even if minCapacity < size().
+  void reserve(size_t minCapacity);
+private:
+  // Do not implement
+  fbstring_core_model& operator=(const fbstring_core_model &);
+};
+*/
+
+/**
+ * This is the core of the string. The code should work on 32- and
+ * 64-bit architectures and with any Char size. Porting to big endian
+ * architectures would require some changes.
+ *
+ * The storage is selected as follows (assuming we store one-byte
+ * characters on a 64-bit machine): (a) "small" strings between 0 and
+ * 23 chars are stored in-situ without allocation (the rightmost byte
+ * stores the size); (b) "medium" strings from 24 through 254 chars
+ * are stored in malloc-allocated memory that is copied eagerly; (c)
+ * "large" strings of 255 chars and above are stored in a similar
+ * structure as medium arrays, except that the string is
+ * reference-counted and copied lazily. the reference count is
+ * allocated right before the character array.
+ *
+ * The discriminator between these three strategies sits in the two
+ * most significant bits of the rightmost char of the storage. If
+ * neither is set, then the string is small (and its length sits in
+ * the lower-order bits of that rightmost character). If the MSb is
+ * set, the string is medium width. If the second MSb is set, then the
+ * string is large.
+ */
+template <class Char> class fbstring_core {
+public:
+  fbstring_core() {
+    // Only initialize the tag, will set the MSBs (i.e. the small
+    // string size) to zero too
+    ml_.capacity_ = maxSmallSize << (8 * (sizeof(size_t) - 1));
+    // or: setSmallSize(0);
+    writeTerminator();
+    assert(category() == isSmall && size() == 0);
+  }
+
+  fbstring_core(const fbstring_core & rhs) {
+    assert(&rhs != this);
+    // Simplest case first: small strings are bitblitted
+    if (rhs.category() == isSmall) {
+      assert(offsetof(MediumLarge, data_) == 0);
+      assert(offsetof(MediumLarge, size_) == sizeof(ml_.data_));
+      assert(offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_));
+      const size_t size = rhs.smallSize();
+      if (size == 0) {
+        ml_.capacity_ = rhs.ml_.capacity_;
+        writeTerminator();
+      } else {
+        // Just write the whole thing, don't look at details. In
+        // particular we need to copy capacity anyway because we want
+        // to set the size (don't forget that the last character,
+        // which stores a short string's length, is shared with the
+        // ml_.capacity field).
+        ml_ = rhs.ml_;
+      }
+      assert(category() == isSmall && this->size() == rhs.size());
+    } else if (rhs.category() == isLarge) {
+      // Large strings are just refcounted
+      ml_ = rhs.ml_;
+      RefCounted::incrementRefs(ml_.data_);
+      assert(category() == isLarge && size() == rhs.size());
+    } else {
+      // Medium strings are copied eagerly. Don't forget to allocate
+      // one extra Char for the null terminator.
+      auto const allocSize =
+           goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char));
+      ml_.data_ = static_cast<Char*>(malloc(allocSize));
+      fbstring_detail::pod_copy(rhs.ml_.data_,
+                                // 1 for terminator
+                                rhs.ml_.data_ + rhs.ml_.size_ + 1,
+                                ml_.data_);
+      // No need for writeTerminator() here, we copied one extra
+      // element just above.
+      ml_.size_ = rhs.ml_.size_;
+      ml_.capacity_ = (allocSize / sizeof(Char) - 1) | isMedium;
+      assert(category() == isMedium);
+    }
+    assert(size() == rhs.size());
+    assert(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0);
+  }
+
+  fbstring_core(fbstring_core&& goner) {
+    if (goner.category() == isSmall) {
+      // Just copy, leave the goner in peace
+      new(this) fbstring_core(goner.small_, goner.smallSize());
+    } else {
+      // Take goner's guts
+      ml_ = goner.ml_;
+      // Clean goner's carcass
+      goner.setSmallSize(0);
+    }
+  }
+
+  fbstring_core(const Char *const data, const size_t size) {
+    // Simplest case first: small strings are bitblitted
+    if (size <= maxSmallSize) {
+      // Layout is: Char* data_, size_t size_, size_t capacity_
+      /*static_*/assert(sizeof(*this) == sizeof(Char*) + 2 * sizeof(size_t));
+      /*static_*/assert(sizeof(Char*) == sizeof(size_t));
+      // sizeof(size_t) must be a power of 2
+      /*static_*/assert((sizeof(size_t) & (sizeof(size_t) - 1)) == 0);
+
+      // If data is aligned, use fast word-wise copying. Otherwise,
+      // use conservative memcpy.
+      if (reinterpret_cast<size_t>(data) & (sizeof(size_t) - 1)) {
+        fbstring_detail::pod_copy(data, data + size, small_);
+      } else {
+        // Copy one word (64 bits) at a time
+        const size_t byteSize = size * sizeof(Char);
+        if (byteSize > 2 * sizeof(size_t)) {
+          // Copy three words
+          ml_.capacity_ = reinterpret_cast<const size_t*>(data)[2];
+          copyTwo:
+          ml_.size_ = reinterpret_cast<const size_t*>(data)[1];
+          copyOne:
+          ml_.data_ = *reinterpret_cast<Char**>(const_cast<Char*>(data));
+        } else if (byteSize > sizeof(size_t)) {
+          // Copy two words
+          goto copyTwo;
+        } else if (size > 0) {
+          // Copy one word
+          goto copyOne;
+        }
+      }
+      setSmallSize(size);
+    } else if (size <= maxMediumSize) {
+      // Medium strings are allocated normally. Don't forget to
+      // allocate one extra Char for the terminating null.
+      auto const allocSize = goodMallocSize((1 + size) * sizeof(Char));
+      ml_.data_ = static_cast<Char*>(malloc(allocSize));
+      fbstring_detail::pod_copy(data, data + size, ml_.data_);
+      ml_.size_ = size;
+      ml_.capacity_ = (allocSize / sizeof(Char) - 1) | isMedium;
+    } else {
+      // Large strings are allocated differently
+      size_t effectiveCapacity = size;
+      auto const newRC = RefCounted::create(data, & effectiveCapacity);
+      ml_.data_ = newRC->data_;
+      ml_.size_ = size;
+      ml_.capacity_ = effectiveCapacity | isLarge;
+    }
+    writeTerminator();
+    assert(this->size() == size);
+    assert(memcmp(this->data(), data, size * sizeof(Char)) == 0);
+  }
+
+  ~fbstring_core() {
+    auto const c = category();
+    if (c == isSmall) {
+      return;
+    }
+    if (c == isMedium) {
+      free(ml_.data_);
+      return;
+    }
+    RefCounted::decrementRefs(ml_.data_);
+  }
+
+  // Snatches a previously mallocated string. The parameter "size"
+  // is the size of the string, and the parameter "capacity" is the size
+  // of the mallocated block.  The string must be \0-terminated, so
+  // data[size] == '\0' and capacity >= size + 1.
+  //
+  // So if you want a 2-character string, pass malloc(3) as "data", pass 2 as
+  // "size", and pass 3 as "capacity".
+  fbstring_core(Char *const data, const size_t size,
+                const size_t capacity,
+                AcquireMallocatedString) {
+    if (size > 0) {
+      assert(capacity > size);
+      assert(data[size] == '\0');
+      // Use the medium string storage
+      ml_.data_ = data;
+      ml_.size_ = size;
+      ml_.capacity_ = capacity | isMedium;
+    } else {
+      // No need for the memory
+      free(data);
+      setSmallSize(0);
+    }
+  }
+
+  // swap below doesn't test whether &rhs == this (and instead
+  // potentially does extra work) on the premise that the rarity of
+  // that situation actually makes the check more expensive than is
+  // worth.
+  void swap(fbstring_core & rhs) {
+    auto const t = ml_;
+    ml_ = rhs.ml_;
+    rhs.ml_ = t;
+  }
+
+  // In C++11 data() and c_str() are 100% equivalent.
+  const Char * data() const {
+    return c_str();
+  }
+
+  Char * mutable_data() {
+    auto const c = category();
+    if (c == isSmall) {
+      return small_;
+    }
+    assert(c == isMedium || c == isLarge);
+    if (c == isLarge && RefCounted::refs(ml_.data_) > 1) {
+      // Ensure unique.
+      size_t effectiveCapacity = ml_.capacity();
+      auto const newRC = RefCounted::create(& effectiveCapacity);
+      // If this fails, someone placed the wrong capacity in an
+      // fbstring.
+      assert(effectiveCapacity >= ml_.capacity());
+      fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1,
+                                newRC->data_);
+      RefCounted::decrementRefs(ml_.data_);
+      ml_.data_ = newRC->data_;
+      // No need to call writeTerminator(), we have + 1 above.
+    }
+    return ml_.data_;
+  }
+
+  const Char * c_str() const {
+    auto const c = category();
+#ifdef FBSTRING_PERVERSE
+    if (c == isSmall) {
+      assert(small_[smallSize()] == TERMINATOR || smallSize() == maxSmallSize
+             || small_[smallSize()] == '\0');
+      small_[smallSize()] = '\0';
+      return small_;
+    }
+    assert(c == isMedium || c == isLarge);
+    assert(ml_.data_[ml_.size_] == TERMINATOR || ml_.data_[ml_.size_] == '\0');
+    ml_.data_[ml_.size_] = '\0';
+#elif defined(FBSTRING_CONSERVATIVE)
+    if (c == isSmall) {
+      assert(small_[smallSize()] == '\0');
+      return small_;
+    }
+    assert(c == isMedium || c == isLarge);
+    assert(ml_.data_[ml_.size_] == '\0');
+#else
+    if (c == isSmall) {
+      small_[smallSize()] = '\0';
+      return small_;
+    }
+    assert(c == isMedium || c == isLarge);
+    ml_.data_[ml_.size_] = '\0';
+#endif
+    return ml_.data_;
+  }
+
+  void shrink(const size_t delta) {
+    if (category() == isSmall) {
+      // Check for underflow
+      assert(delta <= smallSize());
+      setSmallSize(smallSize() - delta);
+    } else if (category() == isMedium || RefCounted::refs(ml_.data_) == 1) {
+      // Medium strings and unique large strings need no special
+      // handling.
+      assert(ml_.size_ >= delta);
+      ml_.size_ -= delta;
+    } else {
+      assert(ml_.size_ >= delta);
+      // Shared large string, must make unique. This is because of the
+      // durn terminator must be written, which may trample the shared
+      // data.
+      if (delta) {
+        fbstring_core(ml_.data_, ml_.size_ - delta).swap(*this);
+      }
+      // No need to write the terminator.
+      return;
+    }
+    writeTerminator();
+  }
+
+  void reserve(size_t minCapacity) {
+    if (category() == isLarge) {
+      // Ensure unique
+      if (RefCounted::refs(ml_.data_) > 1) {
+        // We must make it unique regardless; in-place reallocation is
+        // useless if the string is shared. In order to not surprise
+        // people, reserve the new block at current capacity or
+        // more. That way, a string's capacity never shrinks after a
+        // call to reserve.
+        minCapacity = std::max(minCapacity, ml_.capacity());
+        auto const newRC = RefCounted::create(& minCapacity);
+        fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_ + 1,
+                                   newRC->data_);
+        // Done with the old data. No need to call writeTerminator(),
+        // we have + 1 above.
+        RefCounted::decrementRefs(ml_.data_);
+        ml_.data_ = newRC->data_;
+        ml_.capacity_ = minCapacity | isLarge;
+        // size remains unchanged
+      } else {
+        // String is not shared, so let's try to realloc (if needed)
+        if (minCapacity > ml_.capacity()) {
+          // Asking for more memory
+          auto const newRC =
+               RefCounted::reallocate(ml_.data_, ml_.size_,
+                                      ml_.capacity(), minCapacity);
+          ml_.data_ = newRC->data_;
+          ml_.capacity_ = minCapacity | isLarge;
+          writeTerminator();
+        }
+        assert(capacity() >= minCapacity);
+      }
+    } else if (category() == isMedium) {
+      // String is not shared
+      if (minCapacity <= ml_.capacity()) {
+        return; // nothing to do, there's enough room
+      }
+      if (minCapacity <= maxMediumSize) {
+        // Keep the string at medium size. Don't forget to allocate
+        // one extra Char for the terminating null.
+        size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char));
+        ml_.data_ = static_cast<Char *>(
+          smartRealloc(
+            ml_.data_,
+            ml_.size_ * sizeof(Char),
+            ml_.capacity() * sizeof(Char),
+            capacityBytes));
+        writeTerminator();
+        ml_.capacity_ = (capacityBytes / sizeof(Char) - 1) | isMedium;
+      } else {
+        // Conversion from medium to large string
+        fbstring_core nascent;
+        // Will recurse to another branch of this function
+        nascent.reserve(minCapacity);
+        nascent.ml_.size_ = ml_.size_;
+        fbstring_detail::pod_copy(ml_.data_, ml_.data_ + ml_.size_,
+                                  nascent.ml_.data_);
+        nascent.swap(*this);
+        writeTerminator();
+        assert(capacity() >= minCapacity);
+      }
+    } else {
+      assert(category() == isSmall);
+      if (minCapacity > maxMediumSize) {
+        // large
+        auto const newRC = RefCounted::create(& minCapacity);
+        auto const size = smallSize();
+        fbstring_detail::pod_copy(small_, small_ + size + 1, newRC->data_);
+        // No need for writeTerminator(), we wrote it above with + 1.
+        ml_.data_ = newRC->data_;
+        ml_.size_ = size;
+        ml_.capacity_ = minCapacity | isLarge;
+        assert(capacity() >= minCapacity);
+      } else if (minCapacity > maxSmallSize) {
+        // medium
+        // Don't forget to allocate one extra Char for the terminating null
+        auto const allocSizeBytes =
+          goodMallocSize((1 + minCapacity) * sizeof(Char));
+        auto const data = static_cast<Char*>(malloc(allocSizeBytes));
+        auto const size = smallSize();
+        fbstring_detail::pod_copy(small_, small_ + size + 1, data);
+        // No need for writeTerminator(), we wrote it above with + 1.
+        ml_.data_ = data;
+        ml_.size_ = size;
+        ml_.capacity_ = (allocSizeBytes / sizeof(Char) - 1) | isMedium;
+      } else {
+        // small
+        // Nothing to do, everything stays put
+      }
+    }
+    assert(capacity() >= minCapacity);
+  }
+
+  void expand_noinit(const size_t delta) {
+    // Strategy is simple: make room, then change size
+    assert(capacity() >= size());
+    size_t sz, newSz, cp;
+    if (category() == isSmall) {
+      sz = smallSize();
+      newSz = sz + delta;
+      if (newSz <= maxSmallSize) {
+        setSmallSize(newSz);
+        writeTerminator();
+        return;
+      }
+      cp = maxSmallSize;
+    } else {
+      sz = ml_.size_;
+      newSz = sz + delta;
+      cp = capacity();
+    }
+    if (newSz > cp) reserve(newSz);
+    assert(capacity() >= newSz);
+    // Category can't be small - we took care of that above
+    assert(category() == isMedium || category() == isLarge);
+    ml_.size_ = newSz;
+    writeTerminator();
+    assert(size() == newSz);
+  }
+
+  void push_back(Char c) {
+    assert(capacity() >= size());
+    size_t sz, cp;
+    if (category() == isSmall) {
+      sz = smallSize();
+      if (sz < maxSmallSize) {
+        setSmallSize(sz + 1);
+        small_[sz] = c;
+        writeTerminator();
+        return;
+      }
+      reserve(maxSmallSize * 3 / 2);
+    } else {
+      sz = ml_.size_;
+      cp = ml_.capacity();
+      if (sz == cp) reserve(cp * 3 / 2);
+    }
+    assert(capacity() >= sz + 1);
+    // Category can't be small - we took care of that above
+    assert(category() == isMedium || category() == isLarge);
+    ml_.size_ = sz + 1;
+    mutable_data()[sz] = c;
+    writeTerminator();
+  }
+
+  size_t size() const {
+    return category() == isSmall ? smallSize() : ml_.size_;
+  }
+
+  size_t capacity() const {
+    switch (category()) {
+      case isSmall:
+        return maxSmallSize;
+      case isLarge:
+        // For large-sized strings, a multi-referenced chunk has no
+        // available capacity. This is because any attempt to append
+        // data would trigger a new allocation.
+        if (RefCounted::refs(ml_.data_) > 1) return ml_.size_;
+      default: {}
+    }
+    return ml_.capacity();
+  }
+
+  bool isShared() const {
+    return category() == isLarge && RefCounted::refs(ml_.data_) > 1;
+  }
+
+#ifdef FBSTRING_PERVERSE
+  enum { TERMINATOR = '^' };
+#else
+  enum { TERMINATOR = '\0' };
+#endif
+
+  void writeTerminator() {
+#if defined(FBSTRING_PERVERSE) || defined(FBSTRING_CONSERVATIVE)
+    if (category() == isSmall) {
+      const auto s = smallSize();
+      if (s != maxSmallSize) {
+        small_[s] = TERMINATOR;
+      }
+    } else {
+      ml_.data_[ml_.size_] = TERMINATOR;
+    }
+#endif
+  }
+
+private:
+  // Disabled
+  fbstring_core & operator=(const fbstring_core & rhs);
+
+  struct MediumLarge {
+    Char * data_;
+    size_t size_;
+    size_t capacity_;
+
+    size_t capacity() const {
+      return capacity_ & capacityExtractMask;
+    }
+  };
+
+  struct RefCounted {
+    std::atomic<size_t> refCount_;
+    Char data_[1];
+
+    static RefCounted * fromData(Char * p) {
+      return static_cast<RefCounted*>(
+        static_cast<void*>(
+          static_cast<unsigned char*>(static_cast<void*>(p))
+          - offsetof(RefCounted, data_)));
+    }
+
+    static size_t refs(Char * p) {
+      return fromData(p)->refCount_.load(std::memory_order_acquire);
+    }
+
+    static void incrementRefs(Char * p) {
+      fromData(p)->refCount_.fetch_add(1, std::memory_order_acq_rel);
+    }
+
+    static void decrementRefs(Char * p) {
+      auto const dis = fromData(p);
+      size_t oldcnt = dis->refCount_.fetch_sub(1, std::memory_order_acq_rel);
+      assert(oldcnt > 0);
+      if (oldcnt == 1) {
+        free(dis);
+      }
+    }
+
+    static RefCounted * create(size_t * size) {
+      // Don't forget to allocate one extra Char for the terminating
+      // null. In this case, however, one Char is already part of the
+      // struct.
+      const size_t allocSize = goodMallocSize(
+        sizeof(RefCounted) + *size * sizeof(Char));
+      auto result = static_cast<RefCounted*>(malloc(allocSize));
+      result->refCount_.store(1, std::memory_order_release);
+      *size = (allocSize - sizeof(RefCounted)) / sizeof(Char);
+      return result;
+    }
+
+    static RefCounted * create(const Char * data, size_t * size) {
+      const size_t effectiveSize = *size;
+      auto result = create(size);
+      fbstring_detail::pod_copy(data, data + effectiveSize, result->data_);
+      return result;
+    }
+
+    static RefCounted * reallocate(Char *const data,
+                                   const size_t currentSize,
+                                   const size_t currentCapacity,
+                                   const size_t newCapacity) {
+      assert(newCapacity > 0 && newCapacity > currentSize);
+      auto const dis = fromData(data);
+      assert(dis->refCount_.load(std::memory_order_acquire) == 1);
+      // Don't forget to allocate one extra Char for the terminating
+      // null. In this case, however, one Char is already part of the
+      // struct.
+      auto result = static_cast<RefCounted*>(
+             smartRealloc(dis,
+                          sizeof(RefCounted) + currentSize * sizeof(Char),
+                          sizeof(RefCounted) + currentCapacity * sizeof(Char),
+                          sizeof(RefCounted) + newCapacity * sizeof(Char)));
+      assert(result->refCount_.load(std::memory_order_acquire) == 1);
+      return result;
+    }
+  };
+
+  union {
+    mutable Char small_[sizeof(MediumLarge) / sizeof(Char)];
+    mutable MediumLarge ml_;
+  };
+
+  enum {
+    lastChar = sizeof(MediumLarge) - 1,
+    maxSmallSize = lastChar / sizeof(Char),
+    maxMediumSize = 254 / sizeof(Char),            // coincides with the small
+                                                   // bin size in dlmalloc
+    categoryExtractMask = sizeof(size_t) == 4 ? 0xC0000000 : 0xC000000000000000,
+    capacityExtractMask = ~categoryExtractMask,
+  };
+  static_assert(!(sizeof(MediumLarge) % sizeof(Char)),
+                "Corrupt memory layout for fbstring.");
+
+  enum Category {
+    isSmall = 0,
+    isMedium = sizeof(size_t) == 4 ? 0x80000000 : 0x8000000000000000,
+    isLarge =  sizeof(size_t) == 4 ? 0x40000000 : 0x4000000000000000,
+  };
+
+  Category category() const {
+    // Assumes little endian
+    return static_cast<Category>(ml_.capacity_ & categoryExtractMask);
+  }
+
+  size_t smallSize() const {
+    assert(category() == isSmall && small_[maxSmallSize] <= maxSmallSize);
+    return static_cast<size_t>(maxSmallSize)
+      - static_cast<size_t>(small_[maxSmallSize]);
+  }
+
+  void setSmallSize(size_t s) {
+    // Warning: this should work with uninitialized strings too,
+    // so don't assume anything about the previous value of
+    // small_[maxSmallSize].
+    assert(s <= maxSmallSize);
+    small_[maxSmallSize] = maxSmallSize - s;
+  }
+};
+
+#ifndef _LIBSTDCXX_FBSTRING
+/**
+ * Dummy fbstring core that uses an actual std::string. This doesn't
+ * make any sense - it's just for testing purposes.
+ */
+template <class Char>
+class dummy_fbstring_core {
+public:
+  dummy_fbstring_core() {
+  }
+  dummy_fbstring_core(const dummy_fbstring_core& another)
+      : backend_(another.backend_) {
+  }
+  dummy_fbstring_core(const Char * s, size_t n)
+      : backend_(s, n) {
+  }
+  void swap(dummy_fbstring_core & rhs) {
+    backend_.swap(rhs.backend_);
+  }
+  const Char * data() const {
+    return backend_.data();
+  }
+  Char * mutable_data() {
+    //assert(!backend_.empty());
+    return &*backend_.begin();
+  }
+  void shrink(size_t delta) {
+    assert(delta <= size());
+    backend_.resize(size() - delta);
+  }
+  void expand_noinit(size_t delta) {
+    backend_.resize(size() + delta);
+  }
+  void push_back(Char c) {
+    backend_.push_back(c);
+  }
+  size_t size() const {
+    return backend_.size();
+  }
+  size_t capacity() const {
+    return backend_.capacity();
+  }
+  bool isShared() const {
+    return false;
+  }
+  void reserve(size_t minCapacity) {
+    backend_.reserve(minCapacity);
+  }
+
+private:
+  std::basic_string<Char> backend_;
+};
+#endif // !_LIBSTDCXX_FBSTRING
+
+/**
+ * This is the basic_string replacement. For conformity,
+ * basic_fbstring takes the same template parameters, plus the last
+ * one which is the core.
+ */
+#ifdef _LIBSTDCXX_FBSTRING
+template <typename E, class T, class A, class Storage>
+#else
+template <typename E,
+          class T = std::char_traits<E>,
+          class A = std::allocator<E>,
+          class Storage = fbstring_core<E> >
+#endif
+class basic_fbstring {
+
+  static void enforce(
+      bool condition,
+      void (*throw_exc)(const char*),
+      const char* msg) {
+    if (!condition) throw_exc(msg);
+  }
+
+  bool isSane() const {
+    return
+      begin() <= end() &&
+      empty() == (size() == 0) &&
+      empty() == (begin() == end()) &&
+      size() <= max_size() &&
+      capacity() <= max_size() &&
+      size() <= capacity() &&
+      (begin()[size()] == Storage::TERMINATOR || begin()[size()] == '\0');
+  }
+
+  struct Invariant;
+  friend struct Invariant;
+  struct Invariant {
+#ifndef NDEBUG
+    explicit Invariant(const basic_fbstring& s) : s_(s) {
+      assert(s_.isSane());
+    }
+    ~Invariant() {
+      assert(s_.isSane());
+    }
+  private:
+    const basic_fbstring& s_;
+#else
+    explicit Invariant(const basic_fbstring&) {}
+#endif
+    Invariant& operator=(const Invariant&);
+  };
+
+public:
+  // types
+  typedef T traits_type;
+  typedef typename traits_type::char_type value_type;
+  typedef A allocator_type;
+  typedef typename A::size_type size_type;
+  typedef typename A::difference_type difference_type;
+
+  typedef typename A::reference reference;
+  typedef typename A::const_reference const_reference;
+  typedef typename A::pointer pointer;
+  typedef typename A::const_pointer const_pointer;
+
+  typedef E* iterator;
+  typedef const E* const_iterator;
+  typedef std::reverse_iterator<iterator
+#ifdef NO_ITERATOR_TRAITS
+                                , value_type
+#endif
+                                > reverse_iterator;
+  typedef std::reverse_iterator<const_iterator
+#ifdef NO_ITERATOR_TRAITS
+                                , const value_type
+#endif
+                                > const_reverse_iterator;
+
+  static const size_type npos;                     // = size_type(-1)
+
+private:
+  static void procrustes(size_type& n, size_type nmax) {
+    if (n > nmax) n = nmax;
+  }
+
+public:
+  // 21.3.1 construct/copy/destroy
+  explicit basic_fbstring(const A& a = A()) {
+  }
+
+  basic_fbstring(const basic_fbstring& str)
+      : store_(str.store_) {
+  }
+
+  // Move constructor
+  basic_fbstring(basic_fbstring&& goner) : store_(std::move(goner.store_)) {
+  }
+
+#ifndef _LIBSTDCXX_FBSTRING
+  // This is defined for compatibility with std::string
+  /* implicit */ basic_fbstring(const std::string& str)
+      : store_(str.data(), str.size()) {
+  }
+#endif
+
+  basic_fbstring(const basic_fbstring& str, size_type pos,
+                 size_type n = npos, const A& a = A()) {
+    assign(str, pos, n);
+  }
+
+  /* implicit */ basic_fbstring(const value_type* s, const A& a = A())
+      : store_(s, s ? traits_type::length(s) : ({
+          basic_fbstring<char> err = __PRETTY_FUNCTION__;
+          err += ": null pointer initializer not valid";
+          std::__throw_logic_error(err.c_str());
+          0;
+      })) {
+  }
+
+  basic_fbstring(const value_type* s, size_type n, const A& a = A())
+      : store_(s, n) {
+  }
+
+  basic_fbstring(size_type n, value_type c, const A& a = A()) {
+    store_.expand_noinit(n);
+    auto const data = store_.mutable_data();
+    fbstring_detail::pod_fill(data, data + n, c);
+    store_.writeTerminator();
+  }
+
+  template <class InIt>
+  basic_fbstring(InIt begin, InIt end,
+                 typename std::enable_if<
+                 !std::is_same<typename std::remove_const<InIt>::type,
+                 value_type*>::value, const A>::type & a = A()) {
+    assign(begin, end);
+  }
+
+  // Specialization for const char*, const char*
+  basic_fbstring(const value_type* b, const value_type* e)
+      : store_(b, e - b) {
+  }
+
+  // Nonstandard constructor
+  basic_fbstring(value_type *s, size_type n, size_type c,
+                 AcquireMallocatedString a)
+      : store_(s, n, c, a) {
+  }
+
+  ~basic_fbstring() {
+  }
+
+  basic_fbstring& operator=(const basic_fbstring & lhs) {
+    if (&lhs == this) {
+      return *this;
+    }
+    auto const oldSize = size();
+    auto const srcSize = lhs.size();
+    if (capacity() >= srcSize && !store_.isShared()) {
+      // great, just copy the contents
+      if (oldSize < srcSize)
+        store_.expand_noinit(srcSize - oldSize);
+      else
+        store_.shrink(oldSize - srcSize);
+      assert(size() == srcSize);
+      fbstring_detail::pod_copy(lhs.begin(), lhs.end(), begin());
+      store_.writeTerminator();
+    } else {
+      // need to reallocate, so we may as well create a brand new string
+      basic_fbstring(lhs).swap(*this);
+    }
+    return *this;
+  }
+
+  // Move assignment
+  basic_fbstring& operator=(basic_fbstring&& goner) {
+    // No need of this anymore
+    this->~basic_fbstring();
+    // Move the goner into this
+    new(&store_) fbstring_core<E>(std::move(goner.store_));
+    return *this;
+  }
+
+#ifndef _LIBSTDCXX_FBSTRING
+  // Compatibility with std::string
+  basic_fbstring & operator=(const std::string & rhs) {
+    return assign(rhs.data(), rhs.size());
+  }
+
+  // Compatibility with std::string
+  std::string toStdString() const {
+    return std::string(data(), size());
+  }
+#else
+  // A lot of code in fbcode still uses this method, so keep it here for now.
+  const basic_fbstring& toStdString() const {
+    return *this;
+  }
+#endif
+
+  basic_fbstring& operator=(const value_type* s) {
+    return assign(s);
+  }
+
+  basic_fbstring& operator=(value_type c) {
+    if (empty()) {
+      store_.expand_noinit(1);
+    } else if (store_.isShared()) {
+      basic_fbstring(1, c).swap(*this);
+      return *this;
+    } else {
+      store_.shrink(size() - 1);
+    }
+    *store_.mutable_data() = c;
+    store_.writeTerminator();
+    return *this;
+  }
+
+  // 21.3.2 iterators:
+  iterator begin() { return store_.mutable_data(); }
+
+  const_iterator begin() const { return store_.data(); }
+
+  iterator end() {
+    return store_.mutable_data() + store_.size();
+  }
+
+  const_iterator end() const {
+    return store_.data() + store_.size();
+  }
+
+  reverse_iterator rbegin() {
+    return reverse_iterator(end());
+  }
+
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+
+  reverse_iterator rend() {
+    return reverse_iterator(begin());
+  }
+
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  // Non-standard functions. They intentionally return by value to
+  // reduce pressure on the reference counting mechanism.
+  value_type front() const { return *begin(); }
+  value_type back() const {
+    assert(!empty());
+    return begin()[size() - 1];
+  }
+  void pop_back() { assert(!empty()); store_.shrink(1); }
+
+  // 21.3.3 capacity:
+  size_type size() const { return store_.size(); }
+
+  size_type length() const { return size(); }
+
+  size_type max_size() const {
+    return std::numeric_limits<size_type>::max();
+  }
+
+  void resize(const size_type n, const value_type c = value_type()) {
+    auto size = this->size();
+    if (n <= size) {
+      store_.shrink(size - n);
+    } else {
+      // Do this in two steps to minimize slack memory copied (see
+      // smartRealloc).
+      auto const capacity = this->capacity();
+      assert(capacity >= size);
+      if (size < capacity) {
+        auto delta = std::min(n, capacity) - size;
+        store_.expand_noinit(delta);
+        fbstring_detail::pod_fill(begin() + size, end(), c);
+        size += delta;
+        if (size == n) {
+          store_.writeTerminator();
+          return;
+        }
+        assert(size < n);
+      }
+      auto const delta = n - size;
+      store_.expand_noinit(delta);
+      fbstring_detail::pod_fill(end() - delta, end(), c);
+      store_.writeTerminator();
+    }
+    assert(this->size() == n);
+  }
+
+  size_type capacity() const { return store_.capacity(); }
+
+  void reserve(size_type res_arg = 0) {
+    enforce(res_arg <= max_size(), std::__throw_length_error, "");
+    store_.reserve(res_arg);
+  }
+
+  void clear() { resize(0); }
+
+  bool empty() const { return size() == 0; }
+
+  // 21.3.4 element access:
+  const_reference operator[](size_type pos) const {
+    return *(c_str() + pos);
+  }
+
+  reference operator[](size_type pos) {
+    if (pos == size()) {
+      // Just call c_str() to make sure '\0' is present
+      c_str();
+    }
+    return *(begin() + pos);
+  }
+
+  const_reference at(size_type n) const {
+    enforce(n <= size(), std::__throw_out_of_range, "");
+    return (*this)[n];
+  }
+
+  reference at(size_type n) {
+    enforce(n < size(), std::__throw_out_of_range, "");
+    return (*this)[n];
+  }
+
+  // 21.3.5 modifiers:
+  basic_fbstring& operator+=(const basic_fbstring& str) {
+    return append(str);
+  }
+
+  basic_fbstring& operator+=(const value_type* s) {
+    return append(s);
+  }
+
+  basic_fbstring& operator+=(const value_type c) {
+    push_back(c);
+    return *this;
+  }
+
+  basic_fbstring& append(const basic_fbstring& str) {
+#ifndef NDEBUG
+    auto desiredSize = size() + str.size();
+#endif
+    append(str.data(), str.size());
+    assert(size() == desiredSize);
+    return *this;
+  }
+
+  basic_fbstring& append(const basic_fbstring& str, const size_type pos,
+                         size_type n) {
+    const size_type sz = str.size();
+    enforce(pos <= sz, std::__throw_out_of_range, "");
+    procrustes(n, sz - pos);
+    return append(str.data() + pos, n);
+  }
+
+  basic_fbstring& append(const value_type* s, const size_type n) {
+#ifndef NDEBUG
+    auto oldSize = size();
+#endif
+    Invariant checker(*this);
+    (void) checker;
+    static std::less_equal<const value_type*> le;
+    if (le(data(), s) && !le(data() + size(), s)) {// aliasing
+      assert(le(s + n, data() + size()));
+      const size_type offset = s - data();
+      store_.reserve(size() + n);
+      // Restore the source
+      s = data() + offset;
+    }
+    store_.expand_noinit(n);
+    fbstring_detail::pod_copy(s, s + n, end() - n);
+    store_.writeTerminator();
+    assert(size() == oldSize + n);
+    return *this;
+  }
+
+  basic_fbstring& append(const value_type* s) {
+    return append(s, traits_type::length(s));
+  }
+
+  basic_fbstring& append(size_type n, value_type c) {
+    resize(size() + n, c);
+    return *this;
+  }
+
+  template<class InputIterator>
+  basic_fbstring& append(InputIterator first, InputIterator last) {
+    insert(end(), first, last);
+    return *this;
+  }
+
+  void push_back(const value_type c) {             // primitive
+    store_.push_back(c);
+  }
+
+  basic_fbstring& assign(const basic_fbstring& str) {
+    if (&str == this) return *this;
+    return assign(str.data(), str.size());
+  }
+
+  basic_fbstring& assign(const basic_fbstring& str, const size_type pos,
+                         size_type n) {
+    const size_type sz = str.size();
+    enforce(pos <= sz, std::__throw_out_of_range, "");
+    procrustes(n, sz - pos);
+    return assign(str.data() + pos, n);
+  }
+
+  basic_fbstring& assign(const value_type* s, const size_type n) {
+    Invariant checker(*this);
+    (void) checker;
+    if (size() >= n) {
+      std::copy(s, s + n, begin());
+      resize(n);
+      assert(size() == n);
+    } else {
+      const value_type *const s2 = s + size();
+      std::copy(s, s2, begin());
+      append(s2, n - size());
+      assert(size() == n);
+    }
+    store_.writeTerminator();
+    assert(size() == n);
+    return *this;
+  }
+
+  basic_fbstring& assign(const value_type* s) {
+    return assign(s, traits_type::length(s));
+  }
+
+  template <class ItOrLength, class ItOrChar>
+  basic_fbstring& assign(ItOrLength first_or_n, ItOrChar last_or_c) {
+    return replace(begin(), end(), first_or_n, last_or_c);
+  }
+
+  basic_fbstring& insert(size_type pos1, const basic_fbstring& str) {
+    return insert(pos1, str.data(), str.size());
+  }
+
+  basic_fbstring& insert(size_type pos1, const basic_fbstring& str,
+                         size_type pos2, size_type n) {
+    enforce(pos2 <= str.length(), std::__throw_out_of_range, "");
+    procrustes(n, str.length() - pos2);
+    return insert(pos1, str.data() + pos2, n);
+  }
+
+  basic_fbstring& insert(size_type pos, const value_type* s, size_type n) {
+    enforce(pos <= length(), std::__throw_out_of_range, "");
+    insert(begin() + pos, s, s + n);
+    return *this;
+  }
+
+  basic_fbstring& insert(size_type pos, const value_type* s) {
+    return insert(pos, s, traits_type::length(s));
+  }
+
+  basic_fbstring& insert(size_type pos, size_type n, value_type c) {
+    enforce(pos <= length(), std::__throw_out_of_range, "");
+    insert(begin() + pos, n, c);
+    return *this;
+  }
+
+  iterator insert(const iterator p, const value_type c) {
+    const size_type pos = p - begin();
+    insert(p, 1, c);
+    return begin() + pos;
+  }
+
+private:
+  template <int i> class Selector {};
+
+  basic_fbstring& insertImplDiscr(iterator p,
+                                  size_type n, value_type c, Selector<1>) {
+    Invariant checker(*this);
+    (void) checker;
+    assert(p >= begin() && p <= end());
+    if (capacity() - size() < n) {
+      const size_type sz = p - begin();
+      reserve(size() + n);
+      p = begin() + sz;
+    }
+    const iterator oldEnd = end();
+    if( n < size_type(oldEnd - p)) {
+      append(oldEnd - n, oldEnd);
+      //std::copy(
+      //    reverse_iterator(oldEnd - n),
+      //    reverse_iterator(p),
+      //    reverse_iterator(oldEnd));
+      fbstring_detail::pod_move(&*p, &*oldEnd - n, &*p + n);
+      std::fill(p, p + n, c);
+    } else {
+      append(n - (end() - p), c);
+      append(p, oldEnd);
+      std::fill(p, oldEnd, c);
+    }
+    store_.writeTerminator();
+    return *this;
+  }
+
+  template<class InputIter>
+  basic_fbstring& insertImplDiscr(iterator i,
+                                  InputIter b, InputIter e, Selector<0>) {
+    insertImpl(i, b, e,
+               typename std::iterator_traits<InputIter>::iterator_category());
+    return *this;
+  }
+
+  template <class FwdIterator>
+  void insertImpl(iterator i,
+                  FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) {
+    Invariant checker(*this);
+    (void) checker;
+    const size_type pos = i - begin();
+    const typename std::iterator_traits<FwdIterator>::difference_type n2 =
+      std::distance(s1, s2);
+    assert(n2 >= 0);
+    using namespace fbstring_detail;
+    assert(pos <= size());
+
+    const typename std::iterator_traits<FwdIterator>::difference_type maxn2 =
+      capacity() - size();
+    if (maxn2 < n2) {
+      // realloc the string
+      reserve(size() + n2);
+      i = begin() + pos;
+    }
+    if (pos + n2 <= size()) {
+      const iterator tailBegin = end() - n2;
+      store_.expand_noinit(n2);
+      fbstring_detail::pod_copy(tailBegin, tailBegin + n2, end() - n2);
+      std::copy(reverse_iterator(tailBegin), reverse_iterator(i),
+                reverse_iterator(tailBegin + n2));
+      std::copy(s1, s2, i);
+    } else {
+      FwdIterator t = s1;
+      const size_type old_size = size();
+      std::advance(t, old_size - pos);
+      const size_t newElems = std::distance(t, s2);
+      store_.expand_noinit(n2);
+      std::copy(t, s2, begin() + old_size);
+      fbstring_detail::pod_copy(data() + pos, data() + old_size,
+                                 begin() + old_size + newElems);
+      std::copy(s1, t, i);
+    }
+    store_.writeTerminator();
+  }
+
+  template <class InputIterator>
+  void insertImpl(iterator i,
+                  InputIterator b, InputIterator e, std::input_iterator_tag) {
+    basic_fbstring temp(begin(), i);
+    for (; b != e; ++b) {
+      temp.push_back(*b);
+    }
+    temp.append(i, end());
+    swap(temp);
+  }
+
+public:
+  template <class ItOrLength, class ItOrChar>
+  void insert(iterator p, ItOrLength first_or_n, ItOrChar last_or_c) {
+    Selector<std::numeric_limits<ItOrLength>::is_specialized> sel;
+    insertImplDiscr(p, first_or_n, last_or_c, sel);
+  }
+
+  basic_fbstring& erase(size_type pos = 0, size_type n = npos) {
+    Invariant checker(*this);
+    (void) checker;
+    enforce(pos <= length(), std::__throw_out_of_range, "");
+    procrustes(n, length() - pos);
+    std::copy(begin() + pos + n, end(), begin() + pos);
+    resize(length() - n);
+    return *this;
+  }
+
+  iterator erase(iterator position) {
+    const size_type pos(position - begin());
+    enforce(pos <= size(), std::__throw_out_of_range, "");
+    erase(pos, 1);
+    return begin() + pos;
+  }
+
+  iterator erase(iterator first, iterator last) {
+    const size_type pos(first - begin());
+    erase(pos, last - first);
+    return begin() + pos;
+  }
+
+  // Replaces at most n1 chars of *this, starting with pos1 with the
+  // content of str
+  basic_fbstring& replace(size_type pos1, size_type n1,
+                          const basic_fbstring& str) {
+    return replace(pos1, n1, str.data(), str.size());
+  }
+
+  // Replaces at most n1 chars of *this, starting with pos1,
+  // with at most n2 chars of str starting with pos2
+  basic_fbstring& replace(size_type pos1, size_type n1,
+                          const basic_fbstring& str,
+                          size_type pos2, size_type n2) {
+    enforce(pos2 <= str.length(), std::__throw_out_of_range, "");
+    return replace(pos1, n1, str.data() + pos2,
+                   std::min(n2, str.size() - pos2));
+  }
+
+  // Replaces at most n1 chars of *this, starting with pos, with chars from s
+  basic_fbstring& replace(size_type pos, size_type n1, const value_type* s) {
+    return replace(pos, n1, s, traits_type::length(s));
+  }
+
+  // Replaces at most n1 chars of *this, starting with pos, with n2
+  // occurences of c
+  //
+  // consolidated with
+  //
+  // Replaces at most n1 chars of *this, starting with pos, with at
+  // most n2 chars of str.  str must have at least n2 chars.
+  template <class StrOrLength, class NumOrChar>
+  basic_fbstring& replace(size_type pos, size_type n1,
+                          StrOrLength s_or_n2, NumOrChar n_or_c) {
+    Invariant checker(*this);
+    (void) checker;
+    enforce(pos <= size(), std::__throw_out_of_range, "");
+    procrustes(n1, length() - pos);
+    const iterator b = begin() + pos;
+    return replace(b, b + n1, s_or_n2, n_or_c);
+  }
+
+  basic_fbstring& replace(iterator i1, iterator i2, const basic_fbstring& str) {
+    return replace(i1, i2, str.data(), str.length());
+  }
+
+  basic_fbstring& replace(iterator i1, iterator i2, const value_type* s) {
+    return replace(i1, i2, s, traits_type::length(s));
+  }
+
+private:
+  basic_fbstring& replaceImplDiscr(iterator i1, iterator i2,
+                                   const value_type* s, size_type n,
+                                   Selector<2>) {
+    assert(i1 <= i2);
+    assert(begin() <= i1 && i1 <= end());
+    assert(begin() <= i2 && i2 <= end());
+    return replace(i1, i2, s, s + n);
+  }
+
+  basic_fbstring& replaceImplDiscr(iterator i1, iterator i2,
+                                   size_type n2, value_type c, Selector<1>) {
+    const size_type n1 = i2 - i1;
+    if (n1 > n2) {
+      std::fill(i1, i1 + n2, c);
+      erase(i1 + n2, i2);
+    } else {
+      std::fill(i1, i2, c);
+      insert(i2, n2 - n1, c);
+    }
+    assert(isSane());
+    return *this;
+  }
+
+  template <class InputIter>
+  basic_fbstring& replaceImplDiscr(iterator i1, iterator i2,
+                                   InputIter b, InputIter e,
+                                   Selector<0>) {
+    replaceImpl(i1, i2, b, e,
+                typename std::iterator_traits<InputIter>::iterator_category());
+    return *this;
+  }
+
+private:
+  template <class FwdIterator, class P>
+  bool replaceAliased(iterator i1, iterator i2,
+                      FwdIterator s1, FwdIterator s2, P*) {
+    return false;
+  }
+
+  template <class FwdIterator>
+  bool replaceAliased(iterator i1, iterator i2,
+                      FwdIterator s1, FwdIterator s2, value_type*) {
+    static const std::less_equal<const value_type*> le =
+      std::less_equal<const value_type*>();
+    const bool aliased = le(&*begin(), &*s1) && le(&*s1, &*end());
+    if (!aliased) {
+      return false;
+    }
+    // Aliased replace, copy to new string
+    basic_fbstring temp;
+    temp.reserve(size() - (i2 - i1) + std::distance(s1, s2));
+    temp.append(begin(), i1).append(s1, s2).append(i2, end());
+    swap(temp);
+    return true;
+  }
+
+public:
+  template <class FwdIterator>
+  void replaceImpl(iterator i1, iterator i2,
+                   FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) {
+    Invariant checker(*this);
+    (void) checker;
+
+    // Handle aliased replace
+    if (replaceAliased(i1, i2, s1, s2, &*s1)) {
+      return;
+    }
+
+    auto const n1 = i2 - i1;
+    assert(n1 >= 0);
+    auto const n2 = std::distance(s1, s2);
+    assert(n2 >= 0);
+
+    if (n1 > n2) {
+      // shrinks
+      std::copy(s1, s2, i1);
+      erase(i1 + n2, i2);
+    } else {
+      // grows
+      fbstring_detail::copy_n(s1, n1, i1);
+      std::advance(s1, n1);
+      insert(i2, s1, s2);
+    }
+    assert(isSane());
+  }
+
+  template <class InputIterator>
+  void replaceImpl(iterator i1, iterator i2,
+                   InputIterator b, InputIterator e, std::input_iterator_tag) {
+    basic_fbstring temp(begin(), i1);
+    temp.append(b, e).append(i2, end());
+    swap(temp);
+  }
+
+public:
+  template <class T1, class T2>
+  basic_fbstring& replace(iterator i1, iterator i2,
+                          T1 first_or_n_or_s, T2 last_or_c_or_n) {
+    const bool
+      num1 = std::numeric_limits<T1>::is_specialized,
+      num2 = std::numeric_limits<T2>::is_specialized;
+    return replaceImplDiscr(
+      i1, i2, first_or_n_or_s, last_or_c_or_n,
+      Selector<num1 ? (num2 ? 1 : -1) : (num2 ? 2 : 0)>());
+  }
+
+  size_type copy(value_type* s, size_type n, size_type pos = 0) const {
+    enforce(pos <= size(), std::__throw_out_of_range, "");
+    procrustes(n, size() - pos);
+
+    fbstring_detail::pod_copy(
+      data() + pos,
+      data() + pos + n,
+      s);
+    return n;
+  }
+
+  void swap(basic_fbstring& rhs) {
+    store_.swap(rhs.store_);
+  }
+
+  // 21.3.6 string operations:
+  const value_type* c_str() const {
+    return store_.c_str();
+  }
+
+  const value_type* data() const { return c_str(); }
+
+  allocator_type get_allocator() const {
+    return allocator_type();
+  }
+
+  size_type find(const basic_fbstring& str, size_type pos = 0) const {
+    return find(str.data(), pos, str.length());
+  }
+
+  size_type find(const value_type* needle, const size_type pos,
+                 const size_type nsize) const {
+    if (!nsize) return pos;
+    auto const size = this->size();
+    if (nsize + pos > size) return npos;
+    // Don't use std::search, use a Boyer-Moore-like trick by comparing
+    // the last characters first
+    auto const haystack = data();
+    auto const nsize_1 = nsize - 1;
+    auto const lastNeedle = needle[nsize_1];
+
+    // Boyer-Moore skip value for the last char in the needle. Zero is
+    // not a valid value; skip will be computed the first time it's
+    // needed.
+    size_type skip = 0;
+
+    const E * i = haystack + pos;
+    auto iEnd = haystack + size - nsize_1;
+
+    while (i < iEnd) {
+      // Boyer-Moore: match the last element in the needle
+      while (i[nsize_1] != lastNeedle) {
+        if (++i == iEnd) {
+          // not found
+          return npos;
+        }
+      }
+      // Here we know that the last char matches
+      // Continue in pedestrian mode
+      for (size_t j = 0; ; ) {
+        assert(j < nsize);
+        if (i[j] != needle[j]) {
+          // Not found, we can skip
+          // Compute the skip value lazily
+          if (skip == 0) {
+            skip = 1;
+            while (skip <= nsize_1 && needle[nsize_1 - skip] != lastNeedle) {
+              ++skip;
+            }
+          }
+          i += skip;
+          break;
+        }
+        // Check if done searching
+        if (++j == nsize) {
+          // Yay
+          return i - haystack;
+        }
+      }
+    }
+    return npos;
+  }
+
+  size_type find(const value_type* s, size_type pos = 0) const {
+    return find(s, pos, traits_type::length(s));
+  }
+
+  size_type find (value_type c, size_type pos = 0) const {
+    return find(&c, pos, 1);
+  }
+
+  size_type rfind(const basic_fbstring& str, size_type pos = npos) const {
+    return rfind(str.data(), pos, str.length());
+  }
+
+  size_type rfind(const value_type* s, size_type pos, size_type n) const {
+    if (n > length()) return npos;
+    pos = std::min(pos, length() - n);
+    if (n == 0) return pos;
+
+    const_iterator i(begin() + pos);
+    for (; ; --i) {
+      if (traits_type::eq(*i, *s)
+          && traits_type::compare(&*i, s, n) == 0) {
+        return i - begin();
+      }
+      if (i == begin()) break;
+    }
+    return npos;
+  }
+
+  size_type rfind(const value_type* s, size_type pos = npos) const {
+    return rfind(s, pos, traits_type::length(s));
+  }
+
+  size_type rfind(value_type c, size_type pos = npos) const {
+    return rfind(&c, pos, 1);
+  }
+
+  size_type find_first_of(const basic_fbstring& str, size_type pos = 0) const {
+    return find_first_of(str.data(), pos, str.length());
+  }
+
+  size_type find_first_of(const value_type* s,
+                          size_type pos, size_type n) const {
+    if (pos > length() || n == 0) return npos;
+    const_iterator i(begin() + pos),
+      finish(end());
+    for (; i != finish; ++i) {
+      if (traits_type::find(s, n, *i) != 0) {
+        return i - begin();
+      }
+    }
+    return npos;
+  }
+
+  size_type find_first_of(const value_type* s, size_type pos = 0) const {
+    return find_first_of(s, pos, traits_type::length(s));
+  }
+
+  size_type find_first_of(value_type c, size_type pos = 0) const {
+    return find_first_of(&c, pos, 1);
+  }
+
+  size_type find_last_of (const basic_fbstring& str,
+                          size_type pos = npos) const {
+    return find_last_of(str.data(), pos, str.length());
+  }
+
+  size_type find_last_of (const value_type* s, size_type pos,
+                          size_type n) const {
+    if (!empty() && n > 0) {
+      pos = std::min(pos, length() - 1);
+      const_iterator i(begin() + pos);
+      for (;; --i) {
+        if (traits_type::find(s, n, *i) != 0) {
+          return i - begin();
+        }
+        if (i == begin()) break;
+      }
+    }
+    return npos;
+  }
+
+  size_type find_last_of (const value_type* s,
+                          size_type pos = npos) const {
+    return find_last_of(s, pos, traits_type::length(s));
+  }
+
+  size_type find_last_of (value_type c, size_type pos = npos) const {
+    return find_last_of(&c, pos, 1);
+  }
+
+  size_type find_first_not_of(const basic_fbstring& str,
+                              size_type pos = 0) const {
+    return find_first_not_of(str.data(), pos, str.size());
+  }
+
+  size_type find_first_not_of(const value_type* s, size_type pos,
+                              size_type n) const {
+    if (pos < length()) {
+      const_iterator
+        i(begin() + pos),
+        finish(end());
+      for (; i != finish; ++i) {
+        if (traits_type::find(s, n, *i) == 0) {
+          return i - begin();
+        }
+      }
+    }
+    return npos;
+  }
+
+  size_type find_first_not_of(const value_type* s,
+                              size_type pos = 0) const {
+    return find_first_not_of(s, pos, traits_type::length(s));
+  }
+
+  size_type find_first_not_of(value_type c, size_type pos = 0) const {
+    return find_first_not_of(&c, pos, 1);
+  }
+
+  size_type find_last_not_of(const basic_fbstring& str,
+                             size_type pos = npos) const {
+    return find_last_not_of(str.data(), pos, str.length());
+  }
+
+  size_type find_last_not_of(const value_type* s, size_type pos,
+                             size_type n) const {
+    if (!this->empty()) {
+      pos = std::min(pos, size() - 1);
+      const_iterator i(begin() + pos);
+      for (;; --i) {
+        if (traits_type::find(s, n, *i) == 0) {
+          return i - begin();
+        }
+        if (i == begin()) break;
+      }
+    }
+    return npos;
+  }
+
+  size_type find_last_not_of(const value_type* s,
+                             size_type pos = npos) const {
+    return find_last_not_of(s, pos, traits_type::length(s));
+  }
+
+  size_type find_last_not_of (value_type c, size_type pos = npos) const {
+    return find_last_not_of(&c, pos, 1);
+  }
+
+  basic_fbstring substr(size_type pos = 0, size_type n = npos) const {
+    enforce(pos <= size(), std::__throw_out_of_range, "");
+    return basic_fbstring(data() + pos, std::min(n, size() - pos));
+  }
+
+  int compare(const basic_fbstring& str) const {
+    // FIX due to Goncalo N M de Carvalho July 18, 2005
+    return compare(0, size(), str);
+  }
+
+  int compare(size_type pos1, size_type n1,
+              const basic_fbstring& str) const {
+    return compare(pos1, n1, str.data(), str.size());
+  }
+
+  int compare(size_type pos1, size_type n1,
+              const value_type* s) const {
+    return compare(pos1, n1, s, traits_type::length(s));
+  }
+
+  int compare(size_type pos1, size_type n1,
+              const value_type* s, size_type n2) const {
+    enforce(pos1 <= size(), std::__throw_out_of_range, "");
+    procrustes(n1, size() - pos1);
+    // The line below fixed by Jean-Francois Bastien, 04-23-2007. Thanks!
+    const int r = traits_type::compare(pos1 + data(), s, std::min(n1, n2));
+    return r != 0 ? r : n1 > n2 ? 1 : n1 < n2 ? -1 : 0;
+  }
+
+  int compare(size_type pos1, size_type n1,
+              const basic_fbstring& str,
+              size_type pos2, size_type n2) const {
+    enforce(pos2 <= str.size(), std::__throw_out_of_range, "");
+    return compare(pos1, n1, str.data() + pos2,
+                   std::min(n2, str.size() - pos2));
+  }
+
+  // Code from Jean-Francois Bastien (03/26/2007)
+  int compare(const value_type* s) const {
+    // Could forward to compare(0, size(), s, traits_type::length(s))
+    // but that does two extra checks
+    const size_type n1(size()), n2(traits_type::length(s));
+    const int r = traits_type::compare(data(), s, std::min(n1, n2));
+    return r != 0 ? r : n1 > n2 ? 1 : n1 < n2 ? -1 : 0;
+  }
+
+private:
+  // Data
+  Storage store_;
+};
+
+// non-member functions
+// C++11 21.4.8.1/2
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(const basic_fbstring<E, T, A, S>& lhs,
+                                     const basic_fbstring<E, T, A, S>& rhs) {
+
+  basic_fbstring<E, T, A, S> result;
+  result.reserve(lhs.size() + rhs.size());
+  result.append(lhs).append(rhs);
+  return std::move(result);
+}
+
+// C++11 21.4.8.1/2
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(basic_fbstring<E, T, A, S>&& lhs,
+                                     const basic_fbstring<E, T, A, S>& rhs) {
+  return std::move(lhs.append(rhs));
+}
+
+// C++11 21.4.8.1/3
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(const basic_fbstring<E, T, A, S>& lhs,
+                                     basic_fbstring<E, T, A, S>&& rhs) {
+  if (rhs.capacity() >= lhs.size() + rhs.size()) {
+    // Good, at least we don't need to reallocate
+    return std::move(rhs.insert(0, lhs));
+  }
+  // Meh, no go. Forward to operator+(const&, const&).
+  auto const& rhsC = rhs;
+  return lhs + rhsC;
+}
+
+// C++11 21.4.8.1/4
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(basic_fbstring<E, T, A, S>&& lhs,
+                                     basic_fbstring<E, T, A, S>&& rhs) {
+  return std::move(lhs.append(rhs));
+}
+
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(
+  const typename basic_fbstring<E, T, A, S>::value_type* lhs,
+  const basic_fbstring<E, T, A, S>& rhs) {
+  //
+  basic_fbstring<E, T, A, S> result;
+  const typename basic_fbstring<E, T, A, S>::size_type len =
+    basic_fbstring<E, T, A, S>::traits_type::length(lhs);
+  result.reserve(len + rhs.size());
+  result.append(lhs, len).append(rhs);
+  return result;
+}
+
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(
+  typename basic_fbstring<E, T, A, S>::value_type lhs,
+  const basic_fbstring<E, T, A, S>& rhs) {
+
+  basic_fbstring<E, T, A, S> result;
+  result.reserve(1 + rhs.size());
+  result.push_back(lhs);
+  result.append(rhs);
+  return result;
+}
+
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(
+  const basic_fbstring<E, T, A, S>& lhs,
+  const typename basic_fbstring<E, T, A, S>::value_type* rhs) {
+
+  typedef typename basic_fbstring<E, T, A, S>::size_type size_type;
+  typedef typename basic_fbstring<E, T, A, S>::traits_type traits_type;
+
+  basic_fbstring<E, T, A, S> result;
+  const size_type len = traits_type::length(rhs);
+  result.reserve(lhs.size() + len);
+  result.append(lhs).append(rhs, len);
+  return result;
+}
+
+template <typename E, class T, class A, class S>
+inline
+basic_fbstring<E, T, A, S> operator+(
+  const basic_fbstring<E, T, A, S>& lhs,
+  typename basic_fbstring<E, T, A, S>::value_type rhs) {
+
+  basic_fbstring<E, T, A, S> result;
+  result.reserve(lhs.size() + 1);
+  result.append(lhs);
+  result.push_back(rhs);
+  return result;
+}
+
+template <typename E, class T, class A, class S>
+inline
+bool operator==(const basic_fbstring<E, T, A, S>& lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return lhs.compare(rhs) == 0; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator==(const typename basic_fbstring<E, T, A, S>::value_type* lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return rhs == lhs; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator==(const basic_fbstring<E, T, A, S>& lhs,
+                const typename basic_fbstring<E, T, A, S>::value_type* rhs) {
+  return lhs.compare(rhs) == 0; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator!=(const basic_fbstring<E, T, A, S>& lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return !(lhs == rhs); }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator!=(const typename basic_fbstring<E, T, A, S>::value_type* lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return !(lhs == rhs); }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator!=(const basic_fbstring<E, T, A, S>& lhs,
+                const typename basic_fbstring<E, T, A, S>::value_type* rhs) {
+  return !(lhs == rhs); }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator<(const basic_fbstring<E, T, A, S>& lhs,
+               const basic_fbstring<E, T, A, S>& rhs) {
+  return lhs.compare(rhs) < 0; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator<(const basic_fbstring<E, T, A, S>& lhs,
+               const typename basic_fbstring<E, T, A, S>::value_type* rhs) {
+  return lhs.compare(rhs) < 0; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator<(const typename basic_fbstring<E, T, A, S>::value_type* lhs,
+               const basic_fbstring<E, T, A, S>& rhs) {
+  return rhs.compare(lhs) > 0; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator>(const basic_fbstring<E, T, A, S>& lhs,
+               const basic_fbstring<E, T, A, S>& rhs) {
+  return rhs < lhs; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator>(const basic_fbstring<E, T, A, S>& lhs,
+               const typename basic_fbstring<E, T, A, S>::value_type* rhs) {
+  return rhs < lhs; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator>(const typename basic_fbstring<E, T, A, S>::value_type* lhs,
+               const basic_fbstring<E, T, A, S>& rhs) {
+  return rhs < lhs; }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator<=(const basic_fbstring<E, T, A, S>& lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return !(rhs < lhs); }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator<=(const basic_fbstring<E, T, A, S>& lhs,
+                const typename basic_fbstring<E, T, A, S>::value_type* rhs) {
+  return !(rhs < lhs); }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator<=(const typename basic_fbstring<E, T, A, S>::value_type* lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return !(rhs < lhs); }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator>=(const basic_fbstring<E, T, A, S>& lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return !(lhs < rhs); }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator>=(const basic_fbstring<E, T, A, S>& lhs,
+                const typename basic_fbstring<E, T, A, S>::value_type* rhs) {
+  return !(lhs < rhs); }
+
+template <typename E, class T, class A, class S>
+inline
+bool operator>=(const typename basic_fbstring<E, T, A, S>::value_type* lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+ return !(lhs < rhs);
+}
+
+// subclause 21.3.7.8:
+template <typename E, class T, class A, class S>
+void swap(basic_fbstring<E, T, A, S>& lhs, basic_fbstring<E, T, A, S>& rhs) {
+  lhs.swap(rhs);
+}
+
+// TODO: make this faster.
+template <typename E, class T, class A, class S>
+inline
+std::basic_istream<
+  typename basic_fbstring<E, T, A, S>::value_type,
+  typename basic_fbstring<E, T, A, S>::traits_type>&
+  operator>>(
+    std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
+    typename basic_fbstring<E, T, A, S>::traits_type>& is,
+    basic_fbstring<E, T, A, S>& str) {
+  typename std::basic_istream<E, T>::sentry sentry(is);
+  typedef std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
+                             typename basic_fbstring<E, T, A, S>::traits_type>
+                        __istream_type;
+  typedef typename __istream_type::ios_base __ios_base;
+  size_t extracted = 0;
+  auto err = __ios_base::goodbit;
+  if (sentry) {
+    auto n = is.width();
+    if (n == 0) {
+      n = str.max_size();
+    }
+    str.erase();
+    auto got = is.rdbuf()->sgetc();
+    for (; extracted != n && got != T::eof() && !isspace(got); ++extracted) {
+      // Whew. We get to store this guy
+      str.push_back(got);
+      got = is.rdbuf()->snextc();
+    }
+    if (got == T::eof()) {
+      err |= __ios_base::eofbit;
+      is.width(0);
+    }
+  }
+  if (!extracted) {
+    err |= __ios_base::failbit;
+  }
+  if (err) {
+    is.setstate(err);
+  }
+  return is;
+}
+
+template <typename E, class T, class A, class S>
+inline
+std::basic_ostream<typename basic_fbstring<E, T, A, S>::value_type,
+                   typename basic_fbstring<E, T, A, S>::traits_type>&
+operator<<(
+  std::basic_ostream<typename basic_fbstring<E, T, A, S>::value_type,
+  typename basic_fbstring<E, T, A, S>::traits_type>& os,
+    const basic_fbstring<E, T, A, S>& str) {
+  os.write(str.data(), str.size());
+  return os;
+}
+
+#ifndef _LIBSTDCXX_FBSTRING
+
+template <typename E, class T, class A, class S>
+inline
+std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
+                   typename basic_fbstring<E, T, A, S>::traits_type>&
+getline(
+  std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
+  typename basic_fbstring<E, T, A, S>::traits_type>& is,
+    basic_fbstring<E, T, A, S>& str,
+  typename basic_fbstring<E, T, A, S>::value_type delim) {
+  // Use the nonstandard getdelim()
+  char * buf = NULL;
+  size_t size = 0;
+  for (;;) {
+    // This looks quadratic but it really depends on realloc
+    auto const newSize = size + 128;
+    buf = static_cast<char*>(realloc(buf, newSize));
+    is.getline(buf + size, newSize - size, delim);
+    if (is.bad() || is.eof() || !is.fail()) {
+      // done by either failure, end of file, or normal read
+      size += std::strlen(buf + size);
+      break;
+    }
+    // Here we have failed due to too short a buffer
+    // Minus one to discount the terminating '\0'
+    size = newSize - 1;
+    assert(buf[size] == 0);
+    // Clear the error so we can continue reading
+    is.clear();
+  }
+  basic_fbstring<E, T, A, S> result(buf, size, size + 1,
+                                    AcquireMallocatedString());
+  result.swap(str);
+  return is;
+}
+
+template <typename E, class T, class A, class S>
+inline
+std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
+                   typename basic_fbstring<E, T, A, S>::traits_type>&
+getline(
+  std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
+  typename basic_fbstring<E, T, A, S>::traits_type>& is,
+  basic_fbstring<E, T, A, S>& str) {
+  // Just forward to the version with a delimiter
+  return getline(is, str, '\n');
+}
+
+#endif
+
+template <typename E1, class T, class A, class S>
+const typename basic_fbstring<E1, T, A, S>::size_type
+basic_fbstring<E1, T, A, S>::npos =
+              static_cast<typename basic_fbstring<E1, T, A, S>::size_type>(-1);
+
+#ifndef _LIBSTDCXX_FBSTRING
+// basic_string compatiblity routines
+
+template <typename E, class T, class A, class S>
+inline
+bool operator==(const basic_fbstring<E, T, A, S>& lhs,
+                const std::string& rhs) {
+  return lhs.compare(0, lhs.size(), rhs.data(), rhs.size()) == 0;
+}
+
+template <typename E, class T, class A, class S>
+inline
+bool operator==(const std::string& lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return rhs == lhs;
+}
+
+template <typename E, class T, class A, class S>
+inline
+bool operator!=(const basic_fbstring<E, T, A, S>& lhs,
+                const std::string& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename E, class T, class A, class S>
+inline
+bool operator!=(const std::string& lhs,
+                const basic_fbstring<E, T, A, S>& rhs) {
+  return !(lhs == rhs);
+}
+
+#if !defined(_LIBSTDCXX_FBSTRING)
+typedef basic_fbstring<char> fbstring;
+#endif
+
+// fbstring is relocatable
+template <class T, class R, class A, class S>
+FOLLY_ASSUME_RELOCATABLE(basic_fbstring<T, R, A, S>);
+
+#else
+_GLIBCXX_END_NAMESPACE_VERSION
+#endif
+
+} // namespace folly
+
+#ifndef _LIBSTDCXX_FBSTRING
+
+namespace std {
+template <>
+struct hash< ::folly::fbstring> {
+  size_t operator()(const ::folly::fbstring& s) const {
+    return ::folly::hash::fnv32(s.c_str());
+  }
+};
+}
+
+#endif // _LIBSTDCXX_FBSTRING
+
+#endif // FOLLY_BASE_FBSTRING_H_
diff --git a/folly/FBVector.h b/folly/FBVector.h
new file mode 100644
index 00000000..b9cecd93
--- /dev/null
+++ b/folly/FBVector.h
@@ -0,0 +1,936 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Andrei Alexandrescu (aalexandre)
+
+/**
+ * Vector type. Drop-in replacement for std::vector featuring
+ * significantly faster primitives, see e.g. benchmark results at
+ * https:*phabricator.fb.com/D235852.
+ *
+ * In order for a type to be used with fbvector, it must be
+ * relocatable, see Traits.h.
+ *
+ * For user-defined types you must specialize templates
+ * appropriately. Consult Traits.h for ways to do so and for a handy
+ * family of macros FOLLY_ASSUME_FBVECTOR_COMPATIBLE*.
+ *
+ * For more information and documentation see folly/docs/FBVector.md
+ */
+
+#ifndef FOLLY_FBVECTOR_H_
+#define FOLLY_FBVECTOR_H_
+
+#include "folly/Foreach.h"
+#include "folly/Malloc.h"
+#include "folly/Traits.h"
+#include <iterator>
+#include <algorithm>
+#include <stdexcept>
+#include <limits>
+#include <cassert>
+#include <boost/type_traits.hpp>
+#include <boost/operators.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <type_traits>
+
+namespace folly {
+/**
+ * Forward declaration for use by FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2,
+ * see folly/Traits.h.
+ */
+template <typename T, class Allocator = std::allocator<T> >
+class fbvector;
+}
+
+// You can define an fbvector of fbvectors.
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2(folly::fbvector);
+
+namespace folly {
+namespace fbvector_detail {
+
+/**
+ * isForwardIterator<T>::value yields true if T is a forward iterator
+ * or better, and false otherwise.
+ */
+template <class It> struct isForwardIterator {
+  enum { value = boost::is_convertible<
+         typename std::iterator_traits<It>::iterator_category,
+         std::forward_iterator_tag>::value
+  };
+};
+
+/**
+ * Destroys all elements in the range [b, e). If the type referred to
+ * by the iterators has a trivial destructor, does nothing.
+ */
+template <class It>
+void destroyRange(It b, It e) {
+  typedef typename boost::remove_reference<decltype(*b)>::type T;
+  if (boost::has_trivial_destructor<T>::value) return;
+  for (; b != e; ++b) {
+    (*b).~T();
+  }
+}
+
+/**
+ * Moves the "interesting" part of value to the uninitialized memory
+ * at address addr, and leaves value in a destroyable state.
+ */
+
+template <class T>
+typename boost::enable_if_c<
+  boost::has_trivial_assign<T>::value
+>::type
+uninitialized_destructive_move(T& value, T* addr) {
+  // Just assign the thing; this is most efficient
+  *addr = value;
+}
+
+template <class T>
+typename boost::enable_if_c<
+  !boost::has_trivial_assign<T>::value &&
+  boost::has_nothrow_constructor<T>::value
+>::type
+uninitialized_destructive_move(T& value, T* addr) {
+  // Cheap default constructor - move and reinitialize
+  memcpy(addr, &value, sizeof(T));
+  new(&value) T;
+}
+
+template <class T>
+typename std::enable_if<
+  !boost::has_trivial_assign<T>::value &&
+  !boost::has_nothrow_constructor<T>::value
+>::type
+uninitialized_destructive_move(T& value, T* addr) {
+  // User defined move construction.
+
+  // TODO: we should probably prefer this over the above memcpy()
+  // version when the type has a user-defined move constructor.  We
+  // don't right now because 4.6 doesn't implement
+  // std::is_move_constructible<> yet.
+  new (addr) T(std::move(value));
+}
+
+/**
+ * Fills n objects of type T starting at address b with T's default
+ * value. If the operation throws, destroys all objects constructed so
+ * far and calls free(b).
+ */
+template <class T>
+void uninitializedFillDefaultOrFree(T * b, size_t n) {
+  if (boost::is_arithmetic<T>::value || boost::is_pointer<T>::value) {
+    if (n <= 16384 / sizeof(T)) {
+      memset(b, 0, n * sizeof(T));
+    } else {
+      goto duff_fill;
+    }
+  } else if (boost::has_nothrow_constructor<T>::value) {
+    duff_fill:
+    auto i = b;
+    auto const e1 = b + (n & ~size_t(7));
+    for (; i != e1; i += 8) {
+      new(i) T();
+      new(i + 1) T();
+      new(i + 2) T();
+      new(i + 3) T();
+      new(i + 4) T();
+      new(i + 5) T();
+      new(i + 6) T();
+      new(i + 7) T();
+    }
+    for (auto const e = b + n; i != e; ++i) {
+      new(i) T();
+    }
+  } else {
+    // Conservative approach
+    auto i = b;
+    try {
+      for (auto const e = b + n; i != e; ++i) {
+        new(i) T;
+      }
+    } catch (...) {
+      destroyRange(b, i);
+      free(b);
+      throw;
+    }
+  }
+}
+
+/**
+ * Fills n objects of type T starting at address b with value. If the
+ * operation throws, destroys all objects constructed so far and calls
+ * free(b).
+ */
+template <class T>
+void uninitializedFillOrFree(T * b, size_t n, const T& value) {
+  auto const e = b + n;
+  if (boost::has_trivial_copy<T>::value) {
+    auto i = b;
+    auto const e1 = b + (n & ~size_t(7));
+    for (; i != e1; i += 8) {
+      new(i) T(value);
+      new(i + 1) T(value);
+      new(i + 2) T(value);
+      new(i + 3) T(value);
+      new(i + 4) T(value);
+      new(i + 5) T(value);
+      new(i + 6) T(value);
+      new(i + 7) T(value);
+    }
+    for (; i != e; ++i) {
+      new(i) T(value);
+    }
+  } else {
+    // Conservative approach
+    auto i = b;
+    try {
+      for (; i != e; ++i) {
+        new(i) T(value);
+      }
+    } catch (...) {
+      destroyRange(b, i);
+      free(b);
+      throw;
+    }
+  }
+}
+} // namespace fbvector_detail
+
+/**
+ * This is the std::vector replacement. For conformity, fbvector takes
+ * the same template parameters, but it doesn't use the
+ * allocator. Instead, it uses malloc, and when present, jemalloc's
+ * extensions.
+ */
+template <class T, class Allocator>
+class fbvector : private boost::totally_ordered<fbvector<T,Allocator> > {
+  bool isSane() const {
+    return
+      begin() <= end() &&
+      empty() == (size() == 0) &&
+      empty() == (begin() == end()) &&
+      size() <= max_size() &&
+      capacity() <= max_size() &&
+      size() <= capacity() &&
+
+      // Either we have no capacity or our pointers should make sense:
+      ((!b_ && !e_ && !z_) || (b_ != z_ && e_ <= z_));
+  }
+
+  struct Invariant {
+#ifndef NDEBUG
+    explicit Invariant(const fbvector& s) : s_(s) {
+      assert(s_.isSane());
+    }
+    ~Invariant() {
+      assert(s_.isSane());
+    }
+  private:
+    const fbvector& s_;
+#else
+    explicit Invariant(const fbvector&) {}
+#endif
+    Invariant& operator=(const Invariant&);
+  };
+
+public:
+
+// types:
+  typedef T value_type;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+  typedef T* iterator;
+  typedef const T* const_iterator;
+  typedef size_t size_type;
+  typedef ssize_t difference_type;
+  // typedef typename allocator_traits<Allocator>::pointer pointer;
+  // typedef typename allocator_traits<Allocator>::const_pointer const_pointer;
+  typedef Allocator allocator_type;
+  typedef typename Allocator::pointer pointer;
+  typedef typename Allocator::const_pointer const_pointer;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+// 23.3.6.1 construct/copy/destroy:
+  fbvector() : b_(NULL), e_(NULL), z_(NULL) {}
+
+  explicit fbvector(const Allocator&) {
+    new(this) fbvector;
+  }
+
+  explicit fbvector(const size_type n) {
+    if (n == 0) {
+      b_ = e_ = z_ = 0;
+      return;
+    }
+
+    auto const nBytes = goodMallocSize(n * sizeof(T));
+    b_ = static_cast<T*>(malloc(nBytes));
+    fbvector_detail::uninitializedFillDefaultOrFree(b_, n);
+    e_ = b_ + n;
+    z_ = b_ + nBytes / sizeof(T);
+  }
+
+  fbvector(const size_type n, const T& value) {
+    if (!n) {
+      b_ = e_ = z_ = 0;
+      return;
+    }
+
+    auto const nBytes = goodMallocSize(n * sizeof(T));
+    b_ = static_cast<T*>(malloc(nBytes));
+    fbvector_detail::uninitializedFillOrFree(b_, n, value);
+    e_ = b_ + n;
+    z_ = b_ + nBytes / sizeof(T);
+  }
+
+  fbvector(const size_type n, const T& value, const Allocator&) {
+    new(this) fbvector(n, value);
+  }
+
+  template <class InputIteratorOrNum>
+  fbvector(InputIteratorOrNum first, InputIteratorOrNum last) {
+    new(this) fbvector;
+    assign(first, last);
+  }
+
+  template <class InputIterator>
+  fbvector(InputIterator first, InputIterator last,
+           const Allocator&) {
+    new(this) fbvector(first, last);
+  }
+
+  fbvector(const fbvector& rhs) {
+    new(this) fbvector(rhs.begin(), rhs.end());
+  }
+  fbvector(const fbvector& rhs, const Allocator&) {
+    new(this) fbvector(rhs);
+  }
+
+  fbvector(fbvector&& o, const Allocator& = Allocator())
+    : b_(o.b_)
+    , e_(o.e_)
+    , z_(o.z_)
+  {
+    o.b_ = o.e_ = o.z_ = 0;
+  }
+
+  fbvector(std::initializer_list<T> il, const Allocator& = Allocator()) {
+    new(this) fbvector(il.begin(), il.end());
+  }
+
+  ~fbvector() {
+    // fbvector only works with relocatable objects. We insert this
+    // static check inside the destructor because pretty much any
+    // instantiation of fbvector<T> will generate the destructor (and
+    // therefore refuse compilation if the assertion fails). To see
+    // how you can enable IsRelocatable for your type, refer to the
+    // definition of IsRelocatable in Traits.h.
+    BOOST_STATIC_ASSERT(IsRelocatable<T>::value);
+    if (!b_) return;
+    fbvector_detail::destroyRange(b_, e_);
+    free(b_);
+  }
+  fbvector& operator=(const fbvector& rhs) {
+    assign(rhs.begin(), rhs.end());
+    return *this;
+  }
+
+  fbvector& operator=(fbvector&& v) {
+    clear();
+    swap(v);
+    return *this;
+  }
+
+  fbvector& operator=(std::initializer_list<T> il) {
+    assign(il.begin(), il.end());
+    return *this;
+  }
+
+  bool operator==(const fbvector& rhs) const {
+    return size() == rhs.size() && std::equal(begin(), end(), rhs.begin());
+  }
+
+  bool operator<(const fbvector& rhs) const {
+    return std::lexicographical_compare(begin(), end(),
+                                        rhs.begin(), rhs.end());
+  }
+
+private:
+  template <class InputIterator>
+  void assignImpl(InputIterator first, InputIterator last, boost::false_type) {
+    // Pair of iterators
+    if (fbvector_detail::isForwardIterator<InputIterator>::value) {
+      if (b_ <= &*first && &*first < e_) {
+        // Aliased assign, work on the side
+        fbvector result(first, last);
+        result.swap(*this);
+        return;
+      }
+
+      auto const oldSize = size();
+      auto const newSize = std::distance(first, last);
+
+      if (static_cast<difference_type>(oldSize) >= newSize) {
+        // No reallocation, nice
+        auto const newEnd = std::copy(first, last, b_);
+        fbvector_detail::destroyRange(newEnd, e_);
+        e_ = newEnd;
+        return;
+      }
+
+      // Must reallocate - just do it on the side
+      auto const nBytes = goodMallocSize(newSize * sizeof(T));
+      auto const b = static_cast<T*>(malloc(nBytes));
+      std::uninitialized_copy(first, last, b);
+      this->fbvector::~fbvector();
+      b_ = b;
+      e_ = b + newSize;
+      z_ = b_ + nBytes / sizeof(T);
+    } else {
+      // Input iterator sucks
+      FOR_EACH (i, *this) {
+        if (first == last) {
+          fbvector_detail::destroyRange(i, e_);
+          e_ = i;
+          return;
+        }
+        *i = *first;
+        ++first;
+      }
+      FOR_EACH_RANGE (i, first, last) {
+        push_back(*i);
+      }
+    }
+  }
+
+  void assignImpl(const size_type newSize, const T value, boost::true_type) {
+    // Arithmetic type, forward back to unambiguous definition
+    assign(newSize, value);
+  }
+
+public:
+  // Classic ambiguity (and a lot of unnecessary complexity) in
+  // std::vector: assign(10, 20) for vector<int> means "assign 10
+  // elements all having the value 20" but is intercepted by the
+  // two-iterators overload assign(first, last). So we need to
+  // disambiguate here. There is no pretty solution. We use here
+  // overloading based on is_arithmetic. Method insert has the same
+  // issue (and the same solution in this implementation).
+  template <class InputIteratorOrNum>
+  void assign(InputIteratorOrNum first, InputIteratorOrNum last) {
+    assignImpl(first, last, boost::is_arithmetic<InputIteratorOrNum>());
+  }
+
+  void assign(const size_type newSize, const T& value) {
+    if (b_ <= &value && &value < e_) {
+      // Need to check for aliased assign, sigh
+      return assign(newSize, T(value));
+    }
+
+    auto const oldSize = size();
+    if (oldSize >= newSize) {
+      // No reallocation, nice
+      auto const newEnd = b_ + newSize;
+      fbvector_detail::destroyRange(newEnd, e_);
+      e_ = newEnd;
+      return;
+    }
+
+    // Need to reallocate
+    if (reserve_in_place(newSize)) {
+      // Careful here, fill and uninitialized_fill may throw. The
+      // latter is transactional, so no need to worry about a
+      // buffer partially filled in case of exception.
+      std::fill(b_, e_, value);
+      auto const newEnd = b_ + newSize;
+      std::uninitialized_fill(e_, newEnd, value);
+      e_ = newEnd;
+      return;
+    }
+
+    // Cannot expand or jemalloc not present at all; must just
+    // allocate a new chunk and discard the old one. This is
+    // tantamount with creating a new fbvector altogether. This won't
+    // recurse infinitely; the constructor implements its own.
+    fbvector temp(newSize, value);
+    temp.swap(*this);
+  }
+
+  void assign(std::initializer_list<T> il) {
+    assign(il.begin(), il.end());
+  }
+
+  allocator_type get_allocator() const {
+    // whatevs
+    return allocator_type();
+  }
+
+// iterators:
+  iterator begin() {
+    return b_;
+  }
+  const_iterator begin() const {
+    return b_;
+  }
+  iterator end() {
+    return e_;
+  }
+  const_iterator end() const {
+    return e_;
+  }
+  reverse_iterator rbegin() {
+    return reverse_iterator(end());
+  }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+  reverse_iterator rend() {
+    return reverse_iterator(begin());
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+  const_iterator cbegin() const {
+    return b_;
+  }
+  const_iterator cend() const {
+    return e_;
+  }
+
+// 23.3.6.2 capacity:
+  size_type size() const {
+    return e_ - b_;
+  }
+
+  size_type max_size() {
+    // good luck gettin' there
+    return ~size_type(0);
+  }
+
+  void resize(const size_type sz) {
+    auto const oldSize = size();
+    if (sz <= oldSize) {
+      auto const newEnd = b_ + sz;
+      fbvector_detail::destroyRange(newEnd, e_);
+      e_ = newEnd;
+    } else {
+      // Must expand
+      reserve(sz);
+      auto newEnd = b_ + sz;
+      std::uninitialized_fill(e_, newEnd, T());
+      e_ = newEnd;
+    }
+  }
+
+  void resize(const size_type sz, const T& c) {
+    auto const oldSize = size();
+    if (sz <= oldSize) {
+      auto const newEnd = b_ + sz;
+      fbvector_detail::destroyRange(newEnd, e_);
+      e_ = newEnd;
+    } else {
+      // Must expand
+      reserve(sz);
+      auto newEnd = b_ + sz;
+      std::uninitialized_fill(e_, newEnd, c);
+      e_ = newEnd;
+    }
+  }
+
+  size_type capacity() const {
+    return z_ - b_;
+  }
+  bool empty() const {
+    return b_ == e_;
+  }
+
+private:
+  bool reserve_in_place(const size_type n) {
+    auto const crtCapacity = capacity();
+    if (n <= crtCapacity) return true;
+    if (!rallocm) return false;
+
+    // using jemalloc's API. Don't forget that jemalloc can never grow
+    // in place blocks smaller than 4096 bytes.
+    auto const crtCapacityBytes = crtCapacity * sizeof(T);
+    if (crtCapacityBytes < jemallocMinInPlaceExpandable) return false;
+
+    auto const newCapacityBytes = goodMallocSize(n * sizeof(T));
+    void* p = b_;
+    if (rallocm(&p, NULL, newCapacityBytes, 0, ALLOCM_NO_MOVE)
+        != ALLOCM_SUCCESS) {
+      return false;
+    }
+
+    // Managed to expand in place, reflect that in z_
+    assert(b_ == p);
+    z_ = b_ + newCapacityBytes / sizeof(T);
+    return true;
+  }
+
+  void reserve_with_move(const size_type n) {
+    // Here we can be sure we'll need to do a full reallocation
+    auto const crtCapacity = capacity();
+    assert(crtCapacity < n); // reserve_in_place should have taken
+                             // care of this
+    auto const newCapacityBytes = goodMallocSize(n * sizeof(T));
+    auto b = static_cast<T*>(malloc(newCapacityBytes));
+    auto const oldSize = size();
+    memcpy(b, b_, oldSize * sizeof(T));
+    // Done with the old chunk. Free but don't call destructors!
+    free(b_);
+    b_ = b;
+    e_ = b_ + oldSize;
+    z_ = b_ + newCapacityBytes / sizeof(T);
+    // done with the old chunk
+  }
+
+public:
+  void reserve(const size_type n) {
+    if (reserve_in_place(n)) return;
+    reserve_with_move(n);
+  }
+
+  void shrink_to_fit() {
+    if (!rallocm) return;
+
+    // using jemalloc's API. Don't forget that jemalloc can never
+    // shrink in place blocks smaller than 4096 bytes.
+    void* p = b_;
+    auto const crtCapacityBytes = capacity() * sizeof(T);
+    auto const newCapacityBytes = goodMallocSize(size() * sizeof(T));
+    if (crtCapacityBytes >= jemallocMinInPlaceExpandable &&
+        rallocm(&p, NULL, newCapacityBytes, 0, ALLOCM_NO_MOVE)
+        == ALLOCM_SUCCESS) {
+      // Celebrate
+      z_ = b_ + newCapacityBytes / sizeof(T);
+    }
+  }
+
+// element access
+  reference operator[](size_type n) {
+    assert(n < size());
+    return b_[n];
+  }
+  const_reference operator[](size_type n) const {
+    assert(n < size());
+    return b_[n];
+  }
+  const_reference at(size_type n) const {
+    if (n > size()) {
+      throw std::out_of_range("fbvector: index is greater than size.");
+    }
+    return (*this)[n];
+  }
+  reference at(size_type n) {
+    auto const& cThis = *this;
+    return const_cast<reference>(cThis.at(n));
+  }
+  reference front() {
+    assert(!empty());
+    return *b_;
+  }
+  const_reference front() const {
+    assert(!empty());
+    return *b_;
+  }
+  reference back()  {
+    assert(!empty());
+    return e_[-1];
+  }
+  const_reference back() const {
+    assert(!empty());
+    return e_[-1];
+  }
+
+// 23.3.6.3 data access
+  T* data() {
+    return b_;
+  }
+  const T* data() const {
+    return b_;
+  }
+
+private:
+  size_t computePushBackCapacity() const {
+    return empty() ? std::max(64 / sizeof(T), size_t(1))
+      : capacity() < jemallocMinInPlaceExpandable ? capacity() * 2
+      : (capacity() * 3) / 2;
+  }
+
+public:
+// 23.3.6.4 modifiers:
+  template <class... Args>
+  void emplace_back(Args&&... args) {
+    if (e_ == z_) {
+      if (!reserve_in_place(size() + 1)) {
+        reserve_with_move(computePushBackCapacity());
+      }
+    }
+    new (e_) T(std::forward<Args>(args)...);
+    ++e_;
+  }
+
+  void push_back(T x) {
+    if (e_ == z_) {
+      if (!reserve_in_place(size() + 1)) {
+        reserve_with_move(computePushBackCapacity());
+      }
+    }
+    fbvector_detail::uninitialized_destructive_move(x, e_);
+    ++e_;
+  }
+
+private:
+  bool expand() {
+    if (!rallocm) return false;
+    auto const capBytes = capacity() * sizeof(T);
+    if (capBytes < jemallocMinInPlaceExpandable) return false;
+    auto const newCapBytes = goodMallocSize(capBytes + sizeof(T));
+    void * bv = b_;
+    if (rallocm(&bv, NULL, newCapBytes, 0, ALLOCM_NO_MOVE) != ALLOCM_SUCCESS) {
+      return false;
+    }
+    // Managed to expand in place
+    assert(bv == b_); // nothing moved
+    z_ = b_ + newCapBytes / sizeof(T);
+    assert(capacity() > capBytes / sizeof(T));
+    return true;
+  }
+
+public:
+  void pop_back() {
+    assert(!empty());
+    --e_;
+    if (!boost::has_trivial_destructor<T>::value) {
+      e_->T::~T();
+    }
+  }
+  // template <class... Args>
+  // iterator emplace(const_iterator position, Args&&... args);
+
+  iterator insert(const_iterator position, T x) {
+    size_t newSize; // intentionally uninitialized
+    if (e_ == z_ && !reserve_in_place(newSize = size() + 1)) {
+      // Can't reserve in place, make a copy
+      auto const offset = position - cbegin();
+      fbvector tmp;
+      tmp.reserve(newSize);
+      memcpy(tmp.b_, b_, offset * sizeof(T));
+      fbvector_detail::uninitialized_destructive_move(
+        x,
+        tmp.b_ + offset);
+      memcpy(tmp.b_ + offset + 1, b_ + offset, (size() - offset) * sizeof(T));
+      // Brutally reassign this to refer to tmp's guts
+      free(b_);
+      b_ = tmp.b_;
+      e_ = b_ + newSize;
+      z_ = tmp.z_;
+      // get rid of tmp's guts
+      new(&tmp) fbvector;
+      return begin() + offset;
+    }
+    // Here we have enough room
+    memmove(const_cast<T*>(&*position) + 1,
+            const_cast<T*>(&*position),
+            sizeof(T) * (e_ - position));
+    fbvector_detail::uninitialized_destructive_move(
+      x,
+      const_cast<T*>(&*position));
+    ++e_;
+    return const_cast<iterator>(position);
+  }
+
+  iterator insert(const_iterator position, const size_type n, const T& x) {
+    if (e_ + n >= z_) {
+      if (b_ <= &x && &x < e_) {
+        // Ew, aliased insert
+        auto copy = x;
+        return insert(position, n, copy);
+      }
+      auto const m = position - b_;
+      reserve(size() + n);
+      position = b_ + m;
+    }
+    memmove(const_cast<T*>(position) + n,
+            position,
+            sizeof(T) * (e_ - position));
+    if (boost::has_trivial_copy<T>::value) {
+      std::uninitialized_fill(const_cast<T*>(position),
+                              const_cast<T*>(position) + n,
+                              x);
+    } else {
+      try {
+        std::uninitialized_fill(const_cast<T*>(position),
+                                const_cast<T*>(position) + n,
+                                x);
+      } catch (...) {
+        // Oops, put things back where they were
+        memmove(const_cast<T*>(position),
+                position + n,
+                sizeof(T) * (e_ - position));
+        throw;
+      }
+    }
+    e_ += n;
+    return const_cast<iterator>(position);
+  }
+
+private:
+  template <class InputIterator>
+  iterator insertImpl(const_iterator position,
+                      InputIterator first, InputIterator last,
+                      boost::false_type) {
+    // Pair of iterators
+    if (fbvector_detail::isForwardIterator<InputIterator>::value) {
+      // Can compute distance
+      auto const n = std::distance(first, last);
+      if (e_ + n >= z_) {
+        if (b_ <= &*first && &*first < e_) {
+          // Ew, aliased insert
+          goto conservative;
+        }
+        auto const m = position - b_;
+        reserve(size() + n);
+        position = b_ + m;
+      }
+      memmove(const_cast<T*>(position) + n,
+              position,
+              sizeof(T) * (e_ - position));
+      try {
+        std::uninitialized_copy(first, last,
+                           const_cast<T*>(position));
+      } catch (...) {
+        // Oops, put things back where they were
+        memmove(const_cast<T*>(position),
+                position + n,
+                sizeof(T) * (e_ - position));
+        throw;
+      }
+      e_ += n;
+      return const_cast<iterator>(position);
+    } else {
+      // Cannot compute distance, crappy approach
+      // TODO: OPTIMIZE
+      conservative:
+      fbvector result(cbegin(), position);
+      auto const offset = result.size();
+      FOR_EACH_RANGE (i, first, last) {
+        result.push_back(*i);
+      }
+      result.insert(result.end(), position, cend());
+      result.swap(*this);
+      return begin() + offset;
+    }
+  }
+
+  iterator insertImpl(const_iterator position,
+                      const size_type count, const T value, boost::true_type) {
+    // Forward back to unambiguous function
+    return insert(position, count, value);
+  }
+
+public:
+  template <class InputIteratorOrNum>
+  iterator insert(const_iterator position, InputIteratorOrNum first,
+                  InputIteratorOrNum last) {
+    return insertImpl(position, first, last,
+                      boost::is_arithmetic<InputIteratorOrNum>());
+  }
+
+  iterator insert(const_iterator position, std::initializer_list<T> il) {
+    return insert(position, il.begin(), il.end());
+  }
+
+  iterator erase(const_iterator position) {
+    if (position == e_) return e_;
+    auto p = const_cast<T*>(position);
+    (*p).T::~T();
+    memmove(p, p + 1, sizeof(T) * (e_ - p - 1));
+    --e_;
+    return p;
+  }
+
+  iterator erase(const_iterator first, const_iterator last) {
+    assert(first <= last);
+    auto p1 = const_cast<T*>(first);
+    auto p2 = const_cast<T*>(last);
+    fbvector_detail::destroyRange(p1, p2);
+    memmove(p1, last, sizeof(T) * (e_ - last));
+    e_ -= last - first;
+    return p1;
+  }
+
+  void swap(fbvector& rhs) {
+    std::swap(b_, rhs.b_);
+    std::swap(e_, rhs.e_);
+    std::swap(z_, rhs.z_);
+  }
+
+  void clear() {
+    fbvector_detail::destroyRange(b_, e_);
+    e_ = b_;
+  }
+
+private:
+  // Data
+  T *b_, *e_, *z_;
+};
+
+template <class T, class A>
+bool operator!=(const fbvector<T, A>& lhs,
+                const fbvector<T, A>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <class T, class A>
+void swap(fbvector<T, A>& lhs, fbvector<T, A>& rhs) {
+  lhs.swap(rhs);
+}
+
+/**
+ * Resizes *v to exactly n elements.  May reallocate the vector to a
+ * smaller buffer if too much space will be left unused.
+ */
+template <class T>
+static void compactResize(folly::fbvector<T> * v, size_t size) {
+  auto const oldCap = v->capacity();
+  if (oldCap > size + 1024 && size < oldCap * 0.3) {
+    // Too much slack memory, reallocate a smaller buffer
+    auto const oldSize = v->size();
+    if (size <= oldSize) {
+      // Shrink
+      folly::fbvector<T>(v->begin(), v->begin() + size).swap(*v);
+    } else {
+      // Expand
+      folly::fbvector<T> temp;
+      temp.reserve(size);
+      copy(v->begin(), v->end(), back_inserter(temp));
+      temp.resize(size);
+      temp.swap(*v);
+    }
+  } else {
+    // Nolo contendere
+    v->resize(size);
+  }
+}
+
+} // namespace folly
+
+#endif // FOLLY_FBVECTOR_H_
diff --git a/folly/Foreach.h b/folly/Foreach.h
new file mode 100644
index 00000000..6b626b37
--- /dev/null
+++ b/folly/Foreach.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_BASE_FOREACH_H_
+#define FOLLY_BASE_FOREACH_H_
+
+/*
+ * Iterim macros (until we have C++0x range-based for) that simplify
+ * writing loops of the form
+ *
+ * for (Container<data>::iterator i = c.begin(); i != c.end(); ++i) statement
+ *
+ * Just replace the above with:
+ *
+ * FOR_EACH (i, c) statement
+ *
+ * and everything is taken care of.
+ *
+ * The implementation is a bit convoluted to make sure the container is
+ * only evaluated once (however, keep in mind that c.end() is evaluated
+ * at every pass through the loop). To ensure the container is not
+ * evaluated multiple times, the macro defines one do-nothing if
+ * statement to inject the Boolean variable FOR_EACH_state1, and then a
+ * for statement that is executed only once, which defines the variable
+ * FOR_EACH_state2 holding a reference to the container being
+ * iterated. The workhorse is the last loop, which uses the just defined
+ * reference FOR_EACH_state2.
+ *
+ * The state variables are nested so they don't interfere; you can use
+ * FOR_EACH multiple times in the same scope, either at the same level or
+ * nested.
+ *
+ * In optimized builds g++ eliminates the extra gymnastics entirely and
+ * generates code 100% identical to the handwritten loop.
+ *
+ * This will not work with temporary containers.  Consider BOOST_FOREACH
+ * if you need that.
+ */
+
+#include <boost/type_traits/remove_cv.hpp>
+
+namespace folly { namespace detail {
+
+/*
+ * Simple template for obtaining the unqualified type given a generic
+ * type T. For example, if T is const int,
+ * typeof(remove_cv_from_expression(T())) yields int. Due to a bug in
+ * g++, you need to actually use
+ * typeof(remove_cv_from_expression(T())) instead of typename
+ * boost::remove_cv<T>::type. Note that the function
+ * remove_cv_from_expression is never defined - use it only inside
+ * typeof.
+ */
+template <class T> typename boost::remove_cv<T>::type
+remove_cv_from_expression(T value);
+
+}}
+
+/*
+ * Use a "reference reference" (auto&&) to take advantage of reference
+ * collapsing rules, if available.  In this case, FOR_EACH* will work with
+ * temporary containers.
+ */
+#define FB_AUTO_RR(x, y) auto&& x = y
+
+/*
+ * The first AUTO should be replaced by decltype((c)) &
+ * FOR_EACH_state2, but bugs in gcc prevent that from functioning
+ * properly. The second pair of parens in decltype is actually
+ * required, see
+ * cpp-next.com/archive/2011/04/appearing-and-disappearing-consts-in-c/
+ */
+#define FOR_EACH(i, c)                              \
+  if (bool FOR_EACH_state1 = false) {} else         \
+    for (auto & FOR_EACH_state2 = (c);              \
+         !FOR_EACH_state1; FOR_EACH_state1 = true)  \
+      for (auto i = FOR_EACH_state2.begin();        \
+           i != FOR_EACH_state2.end(); ++i)
+
+/*
+ * Similar to FOR_EACH, but iterates the container backwards by
+ * using rbegin() and rend().
+ */
+#define FOR_EACH_R(i, c)                                \
+  if (bool FOR_EACH_R_state1 = false) {} else           \
+    for (auto & FOR_EACH_R_state2 = (c);                \
+         !FOR_EACH_R_state1; FOR_EACH_R_state1 = true)  \
+      for (auto i = FOR_EACH_R_state2.rbegin();         \
+           i != FOR_EACH_R_state2.rend(); ++i)
+
+/*
+ * Similar to FOR_EACH but also allows client to specify a 'count' variable
+ * to track the current iteration in the loop (starting at zero).
+ * Similar to python's enumerate() function.  For example:
+ * string commaSeparatedValues = "VALUES: ";
+ * FOR_EACH_ENUMERATE(ii, value, columns) {   // don't want comma at the end!
+ *   commaSeparatedValues += (ii == 0) ? *value : string(",") + *value;
+ * }
+ */
+#define FOR_EACH_ENUMERATE(count, i, c)                                \
+  if (bool FOR_EACH_state1 = false) {} else                            \
+    for (auto & FOR_EACH_state2 = (c);                                 \
+         !FOR_EACH_state1; FOR_EACH_state1 = true)                     \
+      if (size_t FOR_EACH_privateCount = 0) {} else                    \
+        if (const size_t& count = FOR_EACH_privateCount) {} else       \
+          for (auto i = FOR_EACH_state2.begin();                       \
+               i != FOR_EACH_state2.end(); ++FOR_EACH_privateCount, ++i)
+
+/**
+ * Similar to FOR_EACH, but gives the user the key and value for each entry in
+ * the container, instead of just the iterator to the entry. For example:
+ *   map<string, string> testMap;
+ *   FOR_EACH_KV(key, value, testMap) {
+ *      cout << key << " " << value;
+ *   }
+ */
+#define FOR_EACH_KV(k, v, c)                                    \
+  if (unsigned int FOR_EACH_state1 = 0) {} else                 \
+    for (FB_AUTO_RR(FOR_EACH_state2, (c));                      \
+         !FOR_EACH_state1; FOR_EACH_state1 = 1)                 \
+      for (auto FOR_EACH_state3 = FOR_EACH_state2.begin();      \
+           FOR_EACH_state3 != FOR_EACH_state2.end();            \
+           FOR_EACH_state1 == 2                                 \
+             ? ((FOR_EACH_state1 = 0), ++FOR_EACH_state3)       \
+             : (FOR_EACH_state3 = FOR_EACH_state2.end()))       \
+        for (auto &k = FOR_EACH_state3->first;                  \
+             !FOR_EACH_state1; ++FOR_EACH_state1)               \
+          for (auto &v = FOR_EACH_state3->second;               \
+               !FOR_EACH_state1; ++FOR_EACH_state1)
+
+namespace folly { namespace detail {
+
+// Boost 1.48 lacks has_less, we emulate a subset of it here.
+template <typename T, typename U>
+class HasLess {
+  struct BiggerThanChar { char unused[2]; };
+  template <typename C, typename D> static char test(decltype(C() < D())*);
+  template <typename, typename> static BiggerThanChar test(...);
+public:
+  enum { value = sizeof(test<T, U>(0)) == 1 };
+};
+
+/**
+ * notThereYet helps the FOR_EACH_RANGE macro by opportunistically
+ * using "<" instead of "!=" whenever available when checking for loop
+ * termination. This makes e.g. examples such as FOR_EACH_RANGE (i,
+ * 10, 5) execute zero iterations instead of looping virtually
+ * forever. At the same time, some iterator types define "!=" but not
+ * "<". The notThereYet function will dispatch differently for those.
+ *
+ * Below is the correct implementation of notThereYet. It is disabled
+ * because of a bug in Boost 1.46: The filesystem::path::iterator
+ * defines operator< (via boost::iterator_facade), but that in turn
+ * uses distance_to which is undefined for that particular
+ * iterator. So HasLess (defined above) identifies
+ * boost::filesystem::path as properly comparable with <, but in fact
+ * attempting to do so will yield a compile-time error.
+ *
+ * The else branch (active) contains a conservative
+ * implementation.
+ */
+
+#if 0
+
+template <class T, class U>
+typename std::enable_if<HasLess<T, U>::value, bool>::type
+notThereYet(T& iter, const U& end) {
+  return iter < end;
+}
+
+template <class T, class U>
+typename std::enable_if<!HasLess<T, U>::value, bool>::type
+notThereYet(T& iter, const U& end) {
+  return iter != end;
+}
+
+#else
+
+template <class T, class U>
+typename std::enable_if<
+  (std::is_arithmetic<T>::value && std::is_arithmetic<U>::value) ||
+  (std::is_pointer<T>::value && std::is_pointer<U>::value),
+  bool>::type
+notThereYet(T& iter, const U& end) {
+  return iter < end;
+}
+
+template <class T, class U>
+typename std::enable_if<
+  !(
+    (std::is_arithmetic<T>::value && std::is_arithmetic<U>::value) ||
+    (std::is_pointer<T>::value && std::is_pointer<U>::value)
+  ),
+  bool>::type
+notThereYet(T& iter, const U& end) {
+  return iter != end;
+}
+
+#endif
+
+
+/**
+ * downTo is similar to notThereYet, but in reverse - it helps the
+ * FOR_EACH_RANGE_R macro.
+ */
+template <class T, class U>
+typename std::enable_if<HasLess<U, T>::value, bool>::type
+downTo(T& iter, const U& begin) {
+  return begin < iter--;
+}
+
+template <class T, class U>
+typename std::enable_if<!HasLess<U, T>::value, bool>::type
+downTo(T& iter, const U& begin) {
+  if (iter == begin) return false;
+  --iter;
+  return true;
+}
+
+} }
+
+/*
+ * Iteration with given limits. end is assumed to be reachable from
+ * begin. end is evaluated every pass through the loop.
+ *
+ * NOTE: The type of the loop variable should be the common type of "begin"
+ *       and "end". e.g. If "begin" is "int" but "end" is "long", we want "i"
+ *       to be "long". This is done by getting the type of (true ? begin : end)
+ */
+#define FOR_EACH_RANGE(i, begin, end)           \
+  for (auto i = (true ? (begin) : (end));       \
+       ::folly::detail::notThereYet(i, (end));  \
+       ++i)
+
+/*
+ * Iteration with given limits. begin is assumed to be reachable from
+ * end by successive decrements. begin is evaluated every pass through
+ * the loop.
+ *
+ * NOTE: The type of the loop variable should be the common type of "begin"
+ *       and "end". e.g. If "begin" is "int" but "end" is "long", we want "i"
+ *       to be "long". This is done by getting the type of (false ? begin : end)
+ */
+#define FOR_EACH_RANGE_R(i, begin, end) \
+  for (auto i = (false ? (begin) : (end)); ::folly::detail::downTo(i, (begin));)
+
+#endif
diff --git a/folly/Format-inl.h b/folly/Format-inl.h
new file mode 100644
index 00000000..59f4550a
--- /dev/null
+++ b/folly/Format-inl.h
@@ -0,0 +1,1078 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_FORMAT_H_
+#error This file may only be included from Format.h.
+#endif
+
+namespace folly {
+
+namespace detail {
+
+extern const char formatHexUpper[256][2];
+extern const char formatHexLower[256][2];
+extern const char formatOctal[512][3];
+extern const char formatBinary[256][8];
+
+const size_t kMaxHexLength = 2 * sizeof(uintmax_t);
+const size_t kMaxOctalLength = 3 * sizeof(uintmax_t);
+const size_t kMaxBinaryLength = 8 * sizeof(uintmax_t);
+
+/**
+ * Convert an unsigned to hex, using repr (which maps from each possible
+ * 2-hex-bytes value to the 2-character representation).
+ *
+ * Just like folly::detail::uintToBuffer in Conv.h, writes at the *end* of
+ * the supplied buffer and returns the offset of the beginning of the string
+ * from the start of the buffer.  The formatted string will be in range
+ * [buf+begin, buf+bufLen).
+ */
+template <class Uint>
+size_t uintToHex(char* buffer, size_t bufLen, Uint v,
+                 const char (&repr)[256][2]) {
+  for (; v >= 256; v >>= 8) {
+    auto b = v & 0xff;
+    bufLen -= 2;
+    buffer[bufLen] = repr[b][0];
+    buffer[bufLen + 1] = repr[b][1];
+  }
+  buffer[--bufLen] = repr[v][1];
+  if (v >= 16) {
+    buffer[--bufLen] = repr[v][0];
+  }
+  return bufLen;
+}
+
+/**
+ * Convert an unsigned to hex, using lower-case letters for the digits
+ * above 9.  See the comments for uintToHex.
+ */
+template <class Uint>
+inline size_t uintToHexLower(char* buffer, size_t bufLen, Uint v) {
+  return uintToHex(buffer, bufLen, v, formatHexLower);
+}
+
+/**
+ * Convert an unsigned to hex, using upper-case letters for the digits
+ * above 9.  See the comments for uintToHex.
+ */
+template <class Uint>
+inline size_t uintToHexUpper(char* buffer, size_t bufLen, Uint v) {
+  return uintToHex(buffer, bufLen, v, formatHexUpper);
+}
+
+/**
+ * Convert an unsigned to octal.
+ *
+ * Just like folly::detail::uintToBuffer in Conv.h, writes at the *end* of
+ * the supplied buffer and returns the offset of the beginning of the string
+ * from the start of the buffer.  The formatted string will be in range
+ * [buf+begin, buf+bufLen).
+ */
+template <class Uint>
+size_t uintToOctal(char* buffer, size_t bufLen, Uint v) {
+  auto& repr = formatOctal;
+  for (; v >= 512; v >>= 9) {
+    auto b = v & 0x1ff;
+    bufLen -= 3;
+    buffer[bufLen] = repr[b][0];
+    buffer[bufLen + 1] = repr[b][1];
+    buffer[bufLen + 2] = repr[b][2];
+  }
+  buffer[--bufLen] = repr[v][2];
+  if (v >= 8) {
+    buffer[--bufLen] = repr[v][1];
+  }
+  if (v >= 64) {
+    buffer[--bufLen] = repr[v][0];
+  }
+  return bufLen;
+}
+
+/**
+ * Convert an unsigned to binary.
+ *
+ * Just like folly::detail::uintToBuffer in Conv.h, writes at the *end* of
+ * the supplied buffer and returns the offset of the beginning of the string
+ * from the start of the buffer.  The formatted string will be in range
+ * [buf+begin, buf+bufLen).
+ */
+template <class Uint>
+size_t uintToBinary(char* buffer, size_t bufLen, Uint v) {
+  auto& repr = formatBinary;
+  if (v == 0) {
+    buffer[--bufLen] = '0';
+    return bufLen;
+  }
+  for (; v; v >>= 8) {
+    auto b = v & 0xff;
+    bufLen -= 8;
+    memcpy(buffer + bufLen, &(repr[b][0]), 8);
+  }
+  while (buffer[bufLen] == '0') {
+    ++bufLen;
+  }
+  return bufLen;
+}
+
+}  // namespace detail
+
+
+template <bool containerMode, class... Args>
+Formatter<containerMode, Args...>::Formatter(StringPiece str, Args&&... args)
+  : str_(str),
+    values_(FormatValue<typename std::decay<Args>::type>(
+        std::forward<Args>(args))...) {
+  static_assert(!containerMode || sizeof...(Args) == 1,
+                "Exactly one argument required in container mode");
+}
+
+template <bool containerMode, class... Args>
+template <class Output>
+void Formatter<containerMode, Args...>::operator()(Output& out) const {
+  auto p = str_.begin();
+  auto end = str_.end();
+
+  // Copy raw string (without format specifiers) to output;
+  // not as simple as we'd like, as we still need to translate "}}" to "}"
+  // and throw if we see any lone "}"
+  auto outputString = [&out] (StringPiece s) {
+    auto p = s.begin();
+    auto end = s.end();
+    while (p != end) {
+      auto q = static_cast<const char*>(memchr(p, '}', end - p));
+      if (!q) {
+        out(StringPiece(p, end));
+        break;
+      }
+      ++q;
+      out(StringPiece(p, q));
+      p = q;
+
+      if (p == end || *p != '}') {
+        throw std::invalid_argument(
+            "folly::format: single '}' in format string");
+      }
+      ++p;
+    }
+  };
+
+  int nextArg = 0;
+  bool hasDefaultArgIndex = false;
+  bool hasExplicitArgIndex = false;
+  while (p != end) {
+    auto q = static_cast<const char*>(memchr(p, '{', end - p));
+    if (!q) {
+      outputString(StringPiece(p, end));
+      break;
+    }
+    outputString(StringPiece(p, q));
+    p = q + 1;
+
+    if (p == end) {
+      throw std::invalid_argument(
+          "folly::format: '}' at end of format string");
+    }
+
+    // "{{" -> "{"
+    if (*p == '{') {
+      out(StringPiece(p, 1));
+      ++p;
+      continue;
+    }
+
+    // Format string
+    q = static_cast<const char*>(memchr(p, '}', end - p));
+    if (q == end) {
+      throw std::invalid_argument("folly::format: missing ending '}'");
+    }
+    FormatArg arg(StringPiece(p, q));
+    p = q + 1;
+
+    int argIndex = 0;
+    auto piece = arg.splitKey<true>();  // empty key component is okay
+    if (containerMode) {  // static
+      if (piece.empty()) {
+        arg.setNextIntKey(nextArg++);
+        hasDefaultArgIndex = true;
+      } else {
+        arg.setNextKey(piece);
+        hasExplicitArgIndex = true;
+      }
+    } else {
+      if (piece.empty()) {
+        argIndex = nextArg++;
+        hasDefaultArgIndex = true;
+      } else {
+        try {
+          argIndex = to<int>(piece);
+        } catch (const std::out_of_range& e) {
+          arg.error("argument index must be integer");
+        }
+        arg.enforce(argIndex >= 0, "argument index must be non-negative");
+        hasExplicitArgIndex = true;
+      }
+    }
+
+    if (hasDefaultArgIndex && hasExplicitArgIndex) {
+      throw std::invalid_argument(
+          "folly::format: may not have both default and explicit arg indexes");
+    }
+
+    doFormat(argIndex, arg, out);
+  }
+}
+
+namespace format_value {
+
+template <class FormatCallback>
+void formatString(StringPiece val, FormatArg& arg, FormatCallback& cb) {
+  if (arg.precision != FormatArg::kDefaultPrecision &&
+      val.size() > arg.precision) {
+    val.reset(val.data(), arg.precision);
+  }
+
+  constexpr int padBufSize = 128;
+  char padBuf[padBufSize];
+
+  // Output padding, no more than padBufSize at once
+  auto pad = [&padBuf, &cb, padBufSize] (int chars) {
+    while (chars) {
+      int n = std::min(chars, padBufSize);
+      cb(StringPiece(padBuf, n));
+      chars -= n;
+    }
+  };
+
+  int padRemaining = 0;
+  if (arg.width != FormatArg::kDefaultWidth && val.size() < arg.width) {
+    char fill = arg.fill == FormatArg::kDefaultFill ? ' ' : arg.fill;
+    int padChars = arg.width - val.size();
+    memset(padBuf, fill, std::min(padBufSize, padChars));
+
+    switch (arg.align) {
+    case FormatArg::Align::DEFAULT:
+    case FormatArg::Align::LEFT:
+      padRemaining = padChars;
+      break;
+    case FormatArg::Align::CENTER:
+      pad(padChars / 2);
+      padRemaining = padChars - padChars / 2;
+      break;
+    case FormatArg::Align::RIGHT:
+    case FormatArg::Align::PAD_AFTER_SIGN:
+      pad(padChars);
+      break;
+    default:
+      abort();
+      break;
+    }
+  }
+
+  cb(val);
+
+  if (padRemaining) {
+    pad(padRemaining);
+  }
+}
+
+template <class FormatCallback>
+void formatNumber(StringPiece val, int prefixLen, FormatArg& arg,
+                  FormatCallback& cb) {
+  // precision means something different for numbers
+  arg.precision = FormatArg::kDefaultPrecision;
+  if (arg.align == FormatArg::Align::DEFAULT) {
+    arg.align = FormatArg::Align::RIGHT;
+  } else if (prefixLen && arg.align == FormatArg::Align::PAD_AFTER_SIGN) {
+    // Split off the prefix, then do any padding if necessary
+    cb(val.subpiece(0, prefixLen));
+    val.advance(prefixLen);
+    arg.width = std::max(arg.width - prefixLen, 0);
+  }
+  format_value::formatString(val, arg, cb);
+}
+
+template <class FormatCallback, bool containerMode, class... Args>
+void formatFormatter(const Formatter<containerMode, Args...>& formatter,
+                     FormatArg& arg,
+                     FormatCallback& cb) {
+  if (arg.width == FormatArg::kDefaultWidth &&
+      arg.precision == FormatArg::kDefaultPrecision) {
+    // nothing to do
+    formatter(cb);
+  } else if (arg.align != FormatArg::Align::LEFT &&
+             arg.align != FormatArg::Align::DEFAULT) {
+    // We can only avoid creating a temporary string if we align left,
+    // as we'd need to know the size beforehand otherwise
+    format_value::formatString(formatter.fbstr(), arg, cb);
+  } else {
+    auto fn = [&arg, &cb] (StringPiece sp) mutable {
+      int sz = static_cast<int>(sp.size());
+      if (arg.precision != FormatArg::kDefaultPrecision) {
+        sz = std::min(arg.precision, sz);
+        sp.reset(sp.data(), sz);
+        arg.precision -= sz;
+      }
+      if (!sp.empty()) {
+        cb(sp);
+        if (arg.width != FormatArg::kDefaultWidth) {
+          arg.width = std::max(arg.width - sz, 0);
+        }
+      }
+    };
+    formatter(fn);
+    if (arg.width != FormatArg::kDefaultWidth && arg.width != 0) {
+      // Rely on formatString to do appropriate padding
+      format_value::formatString(StringPiece(), arg, cb);
+    }
+  }
+}
+
+}  // namespace format_value
+
+// Definitions for default FormatValue classes
+
+// Integral types (except bool)
+template <class T>
+class FormatValue<
+  T, typename std::enable_if<
+    std::is_integral<T>::value &&
+    !std::is_same<T, bool>::value>::type>
+  {
+ public:
+  explicit FormatValue(T val) : val_(val) { }
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    arg.validate(FormatArg::Type::INTEGER);
+    doFormat(arg, cb);
+  }
+
+  template <class FormatCallback>
+  void doFormat(FormatArg& arg, FormatCallback& cb) const {
+    char presentation = arg.presentation;
+    if (presentation == FormatArg::kDefaultPresentation) {
+      presentation = std::is_same<T, char>::value ? 'c' : 'd';
+    }
+
+    // Do all work as unsigned, we'll add the prefix ('0' or '0x' if necessary)
+    // and sign ourselves.
+    typedef typename std::make_unsigned<T>::type UT;
+    UT uval;
+    char sign;
+    if (std::is_signed<T>::value) {
+      if (val_ < 0) {
+        uval = static_cast<UT>(-val_);
+        sign = '-';
+      } else {
+        uval = static_cast<UT>(val_);
+        switch (arg.sign) {
+        case FormatArg::Sign::PLUS_OR_MINUS:
+          sign = '+';
+          break;
+        case FormatArg::Sign::SPACE_OR_MINUS:
+          sign = ' ';
+          break;
+        default:
+          sign = '\0';
+          break;
+        }
+      }
+    } else {
+      uval = val_;
+      sign = '\0';
+
+      arg.enforce(arg.sign == FormatArg::Sign::DEFAULT,
+                  "sign specifications not allowed for unsigned values");
+    }
+
+    // max of:
+    // #x: 0x prefix + 16 bytes = 18 bytes
+    // #o: 0 prefix + 22 bytes = 23 bytes
+    // #b: 0b prefix + 64 bytes = 65 bytes
+    // ,d: 26 bytes (including thousands separators!)
+    // + nul terminator
+    // + 3 for sign and prefix shenanigans (see below)
+    constexpr size_t valBufSize = 69;
+    char valBuf[valBufSize];
+    char* valBufBegin = nullptr;
+    char* valBufEnd = nullptr;
+
+    // Defer to sprintf
+    auto useSprintf = [&] (const char* format) mutable {
+      valBufBegin = valBuf + 3;  // room for sign and base prefix
+      valBufEnd = valBufBegin + sprintf(valBufBegin, format,
+                                        static_cast<uintmax_t>(uval));
+    };
+
+    int prefixLen = 0;
+
+    switch (presentation) {
+    case 'n':  // TODO(tudorb): locale awareness?
+    case 'd':
+      arg.enforce(!arg.basePrefix,
+                  "base prefix not allowed with '", presentation,
+                  "' specifier");
+      if (arg.thousandsSeparator) {
+        useSprintf("%'ju");
+      } else {
+        // Use uintToBuffer, faster than sprintf
+        valBufEnd = valBuf + valBufSize - 1;
+        valBufBegin = valBuf + detail::uintToBuffer(valBuf, valBufSize - 1,
+                                                    uval);
+      }
+      break;
+    case 'c':
+      arg.enforce(!arg.basePrefix,
+                  "base prefix not allowed with '", presentation,
+                  "' specifier");
+      arg.enforce(!arg.thousandsSeparator,
+                  "thousands separator (',') not allowed with '",
+                  presentation, "' specifier");
+      valBufBegin = valBuf + 3;
+      *valBufBegin = static_cast<char>(uval);
+      valBufEnd = valBufBegin + 1;
+      break;
+    case 'o':
+    case 'O':
+      arg.enforce(!arg.thousandsSeparator,
+                  "thousands separator (',') not allowed with '",
+                  presentation, "' specifier");
+      valBufEnd = valBuf + valBufSize - 1;
+      valBufBegin = valBuf + detail::uintToOctal(valBuf, valBufSize - 1, uval);
+      if (arg.basePrefix) {
+        *--valBufBegin = '0';
+        prefixLen = 1;
+      }
+      break;
+    case 'x':
+      arg.enforce(!arg.thousandsSeparator,
+                  "thousands separator (',') not allowed with '",
+                  presentation, "' specifier");
+      valBufEnd = valBuf + valBufSize - 1;
+      valBufBegin = valBuf + detail::uintToHexLower(valBuf, valBufSize - 1,
+                                                    uval);
+      if (arg.basePrefix) {
+        *--valBufBegin = 'x';
+        *--valBufBegin = '0';
+        prefixLen = 2;
+      }
+      break;
+    case 'X':
+      arg.enforce(!arg.thousandsSeparator,
+                  "thousands separator (',') not allowed with '",
+                  presentation, "' specifier");
+      valBufEnd = valBuf + valBufSize - 1;
+      valBufBegin = valBuf + detail::uintToHexUpper(valBuf, valBufSize - 1,
+                                                    uval);
+      if (arg.basePrefix) {
+        *--valBufBegin = 'X';
+        *--valBufBegin = '0';
+        prefixLen = 2;
+      }
+      break;
+    case 'b':
+    case 'B':
+      arg.enforce(!arg.thousandsSeparator,
+                  "thousands separator (',') not allowed with '",
+                  presentation, "' specifier");
+      valBufEnd = valBuf + valBufSize - 1;
+      valBufBegin = valBuf + detail::uintToBinary(valBuf, valBufSize - 1,
+                                                  uval);
+      if (arg.basePrefix) {
+        *--valBufBegin = presentation;  // 0b or 0B
+        *--valBufBegin = '0';
+        prefixLen = 2;
+      }
+      break;
+    default:
+      arg.error("invalid specifier '", presentation, "'");
+    }
+
+    if (sign) {
+      *--valBufBegin = sign;
+      ++prefixLen;
+    }
+
+    format_value::formatNumber(StringPiece(valBufBegin, valBufEnd), prefixLen,
+                               arg, cb);
+  }
+
+ private:
+  T val_;
+};
+
+// Bool
+template <>
+class FormatValue<bool> {
+ public:
+  explicit FormatValue(bool val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    if (arg.presentation == FormatArg::kDefaultPresentation) {
+      arg.validate(FormatArg::Type::OTHER);
+      format_value::formatString(val_ ? "true" : "false", arg, cb);
+    } else {  // number
+      FormatValue<int>(val_).format(arg, cb);
+    }
+  }
+
+ private:
+  bool val_;
+};
+
+// double
+template <>
+class FormatValue<double> {
+ public:
+  explicit FormatValue(double val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    using ::double_conversion::DoubleToStringConverter;
+    using ::double_conversion::StringBuilder;
+
+    arg.validate(FormatArg::Type::FLOAT);
+
+    if (arg.presentation == FormatArg::kDefaultPresentation) {
+      arg.presentation = 'g';
+    }
+
+    const char* infinitySymbol = isupper(arg.presentation) ? "INF" : "inf";
+    const char* nanSymbol = isupper(arg.presentation) ? "NAN" : "nan";
+    char exponentSymbol = isupper(arg.presentation) ? 'E' : 'e';
+
+    if (arg.precision == FormatArg::kDefaultPrecision) {
+      arg.precision = 6;
+    }
+
+    bool done = false;
+
+    // 2+: for null terminator and optional sign shenanigans.
+    char buf[2 + std::max({
+        (2 + DoubleToStringConverter::kMaxFixedDigitsBeforePoint +
+         DoubleToStringConverter::kMaxFixedDigitsAfterPoint),
+        (8 + DoubleToStringConverter::kMaxExponentialDigits),
+        (7 + DoubleToStringConverter::kMaxPrecisionDigits)})];
+    StringBuilder builder(buf + 1, sizeof(buf) - 1);
+
+    char plusSign;
+    switch (arg.sign) {
+    case FormatArg::Sign::PLUS_OR_MINUS:
+      plusSign = '+';
+      break;
+    case FormatArg::Sign::SPACE_OR_MINUS:
+      plusSign = ' ';
+      break;
+    default:
+      plusSign = '\0';
+      break;
+    };
+
+    double val = val_;
+    switch (arg.presentation) {
+    case '%':
+      val *= 100;
+    case 'f':
+    case 'F':
+      {
+        if (arg.precision >
+            DoubleToStringConverter::kMaxFixedDigitsAfterPoint) {
+          arg.precision = DoubleToStringConverter::kMaxFixedDigitsAfterPoint;
+        }
+        DoubleToStringConverter conv(
+            DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
+            infinitySymbol,
+            nanSymbol,
+            exponentSymbol,
+            -4, arg.precision,
+            0, 0);
+        arg.enforce(conv.ToFixed(val, arg.precision, &builder),
+                    "fixed double conversion failed");
+      }
+      break;
+    case 'e':
+    case 'E':
+      {
+        if (arg.precision > DoubleToStringConverter::kMaxExponentialDigits) {
+          arg.precision = DoubleToStringConverter::kMaxExponentialDigits;
+        }
+
+        DoubleToStringConverter conv(
+            DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
+            infinitySymbol,
+            nanSymbol,
+            exponentSymbol,
+            -4, arg.precision,
+            0, 0);
+        CHECK(conv.ToExponential(val, arg.precision, &builder));
+      }
+      break;
+    case 'n':  // should be locale-aware, but isn't
+    case 'g':
+    case 'G':
+      {
+        if (arg.precision < DoubleToStringConverter::kMinPrecisionDigits) {
+          arg.precision = DoubleToStringConverter::kMinPrecisionDigits;
+        } else if (arg.precision >
+                   DoubleToStringConverter::kMaxPrecisionDigits) {
+          arg.precision = DoubleToStringConverter::kMaxPrecisionDigits;
+        }
+        DoubleToStringConverter conv(
+            DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
+            infinitySymbol,
+            nanSymbol,
+            exponentSymbol,
+            -4, arg.precision,
+            0, 0);
+        CHECK(conv.ToShortest(val, &builder));
+      }
+      break;
+    default:
+      arg.error("invalid specifier '", arg.presentation, "'");
+    }
+
+    int len = builder.position();
+    builder.Finalize();
+    DCHECK_GT(len, 0);
+
+    // Add '+' or ' ' sign if needed
+    char* p = buf + 1;
+    // anything that's neither negative nor nan
+    int prefixLen = 0;
+    if (plusSign && (*p != '-' && *p != 'n' && *p != 'N')) {
+      *--p = plusSign;
+      ++len;
+      prefixLen = 1;
+    } else if (*p == '-') {
+      prefixLen = 1;
+    }
+
+    format_value::formatNumber(StringPiece(p, len), prefixLen, arg, cb);
+  }
+
+ private:
+  double val_;
+};
+
+// float (defer to double)
+template <>
+class FormatValue<float> {
+ public:
+  explicit FormatValue(float val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    FormatValue<double>(val_).format(arg, cb);
+  }
+
+ private:
+  float val_;
+};
+
+// Sring-y types (implicitly convertible to StringPiece, except char*)
+template <class T>
+class FormatValue<
+  T, typename std::enable_if<
+      (!std::is_pointer<T>::value ||
+       !std::is_same<char, typename std::decay<
+          typename std::remove_pointer<T>::type>::type>::value) &&
+      std::is_convertible<T, StringPiece>::value>::type>
+  {
+ public:
+  explicit FormatValue(StringPiece val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    if (arg.keyEmpty()) {
+      arg.validate(FormatArg::Type::OTHER);
+      arg.enforce(arg.presentation == FormatArg::kDefaultPresentation ||
+                  arg.presentation == 's',
+                  "invalid specifier '", arg.presentation, "'");
+      format_value::formatString(val_, arg, cb);
+    } else {
+      FormatValue<char>(val_.at(arg.splitIntKey())).format(arg, cb);
+    }
+  }
+
+ private:
+  StringPiece val_;
+};
+
+// Null
+template <>
+class FormatValue<std::nullptr_t> {
+ public:
+  explicit FormatValue(std::nullptr_t) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    arg.validate(FormatArg::Type::OTHER);
+    arg.enforce(arg.presentation == FormatArg::kDefaultPresentation,
+                "invalid specifier '", arg.presentation, "'");
+    format_value::formatString("(null)", arg, cb);
+  }
+};
+
+// Partial specialization of FormatValue for char*
+template <class T>
+class FormatValue<
+  T*,
+  typename std::enable_if<
+      std::is_same<char, typename std::decay<T>::type>::value>::type>
+  {
+ public:
+  explicit FormatValue(T* val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    if (arg.keyEmpty()) {
+      if (!val_) {
+        FormatValue<std::nullptr_t>(nullptr).format(arg, cb);
+      } else {
+        FormatValue<StringPiece>(val_).format(arg, cb);
+      }
+    } else {
+      FormatValue<typename std::decay<T>::type>(
+          val_[arg.splitIntKey()]).format(arg, cb);
+    }
+  }
+
+ private:
+  T* val_;
+};
+
+// Partial specialization of FormatValue for void*
+template <class T>
+class FormatValue<
+  T*,
+  typename std::enable_if<
+      std::is_same<void, typename std::decay<T>::type>::value>::type>
+  {
+ public:
+  explicit FormatValue(T* val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    if (!val_) {
+      FormatValue<std::nullptr_t>(nullptr).format(arg, cb);
+    } else {
+      // Print as a pointer, in hex.
+      arg.validate(FormatArg::Type::OTHER);
+      arg.enforce(arg.presentation == FormatArg::kDefaultPresentation,
+                  "invalid specifier '", arg.presentation, "'");
+      arg.basePrefix = true;
+      arg.presentation = 'x';
+      if (arg.align == FormatArg::Align::DEFAULT) {
+        arg.align = FormatArg::Align::LEFT;
+      }
+      FormatValue<uintptr_t>(
+          reinterpret_cast<uintptr_t>(val_)).doFormat(arg, cb);
+    }
+  }
+
+ private:
+  T* val_;
+};
+
+// Partial specialization of FormatValue for other pointers
+template <class T>
+class FormatValue<
+  T*,
+  typename std::enable_if<
+      !std::is_same<char, typename std::decay<T>::type>::value &&
+      !std::is_same<void, typename std::decay<T>::type>::value>::type>
+  {
+ public:
+  explicit FormatValue(T* val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    if (arg.keyEmpty()) {
+      FormatValue<void*>((void*)val_).format(arg, cb);
+    } else {
+      FormatValue<typename std::decay<T>::type>(
+          val_[arg.splitIntKey()]).format(arg, cb);
+    }
+  }
+ private:
+  T* val_;
+};
+
+namespace detail {
+
+// Shortcut, so we don't have to use enable_if everywhere
+struct FormatTraitsBase {
+  typedef void enabled;
+};
+
+// Traits that define enabled, value_type, and at() for anything
+// indexable with integral keys: pointers, arrays, vectors, and maps
+// with integral keys
+template <class T, class Enable=void> struct IndexableTraits;
+
+// Base class for sequences (vectors, deques)
+template <class C>
+struct IndexableTraitsSeq : public FormatTraitsBase {
+  typedef C container_type;
+  typedef typename C::value_type value_type;
+  static const value_type& at(const C& c, int idx) {
+    return c.at(idx);
+  }
+};
+
+// Base class for associative types (maps)
+template <class C>
+struct IndexableTraitsAssoc : public FormatTraitsBase {
+  typedef typename C::value_type::second_type value_type;
+  static const value_type& at(const C& c, int idx) {
+    return c.at(static_cast<typename C::key_type>(idx));
+  }
+};
+
+// std::array
+template <class T, size_t N>
+struct IndexableTraits<std::array<T, N>>
+  : public IndexableTraitsSeq<std::array<T, N>> {
+};
+
+// std::vector
+template <class T, class A>
+struct IndexableTraits<std::vector<T, A>>
+  : public IndexableTraitsSeq<std::vector<T, A>> {
+};
+
+// std::deque
+template <class T, class A>
+struct IndexableTraits<std::deque<T, A>>
+  : public IndexableTraitsSeq<std::deque<T, A>> {
+};
+
+// fbvector
+template <class T, class A>
+struct IndexableTraits<fbvector<T, A>>
+  : public IndexableTraitsSeq<fbvector<T, A>> {
+};
+
+// small_vector
+template <class T, size_t M, class A, class B, class C>
+struct IndexableTraits<small_vector<T, M, A, B, C>>
+  : public IndexableTraitsSeq<small_vector<T, M, A, B, C>> {
+};
+
+// std::map with integral keys
+template <class K, class T, class C, class A>
+struct IndexableTraits<
+  std::map<K, T, C, A>,
+  typename std::enable_if<std::is_integral<K>::value>::type>
+  : public IndexableTraitsAssoc<std::map<K, T, C, A>> {
+};
+
+// std::unordered_map with integral keys
+template <class K, class T, class H, class E, class A>
+struct IndexableTraits<
+  std::unordered_map<K, T, H, E, A>,
+  typename std::enable_if<std::is_integral<K>::value>::type>
+  : public IndexableTraitsAssoc<std::unordered_map<K, T, H, E, A>> {
+};
+
+}  // namespace detail
+
+// Partial specialization of FormatValue for integer-indexable containers
+template <class T>
+class FormatValue<
+  T,
+  typename detail::IndexableTraits<T>::enabled> {
+ public:
+  explicit FormatValue(const T& val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    FormatValue<typename std::decay<
+      typename detail::IndexableTraits<T>::value_type>::type>(
+        detail::IndexableTraits<T>::at(
+            val_, arg.splitIntKey())).format(arg, cb);
+  }
+
+ private:
+  const T& val_;
+};
+
+namespace detail {
+
+// Define enabled, key_type, convert from StringPiece to the key types
+// that we support
+template <class T> struct KeyFromStringPiece;
+
+// std::string
+template <>
+struct KeyFromStringPiece<std::string> : public FormatTraitsBase {
+  typedef std::string key_type;
+  static std::string convert(StringPiece s) {
+    return s.toString();
+  }
+  typedef void enabled;
+};
+
+// fbstring
+template <>
+struct KeyFromStringPiece<fbstring> : public FormatTraitsBase {
+  typedef fbstring key_type;
+  static fbstring convert(StringPiece s) {
+    return s.toFbstring();
+  }
+};
+
+// StringPiece
+template <>
+struct KeyFromStringPiece<StringPiece> : public FormatTraitsBase {
+  typedef StringPiece key_type;
+  static StringPiece convert(StringPiece s) {
+    return s;
+  }
+};
+
+// Base class for associative types keyed by strings
+template <class T> struct KeyableTraitsAssoc : public FormatTraitsBase {
+  typedef typename T::key_type key_type;
+  typedef typename T::value_type::second_type value_type;
+  static const value_type& at(const T& map, StringPiece key) {
+    return map.at(KeyFromStringPiece<key_type>::convert(key));
+  }
+};
+
+// Define enabled, key_type, value_type, at() for supported string-keyed
+// types
+template <class T, class Enabled=void> struct KeyableTraits;
+
+// std::map with string key
+template <class K, class T, class C, class A>
+struct KeyableTraits<
+  std::map<K, T, C, A>,
+  typename KeyFromStringPiece<K>::enabled>
+  : public KeyableTraitsAssoc<std::map<K, T, C, A>> {
+};
+
+// std::unordered_map with string key
+template <class K, class T, class H, class E, class A>
+struct KeyableTraits<
+  std::unordered_map<K, T, H, E, A>,
+  typename KeyFromStringPiece<K>::enabled>
+  : public KeyableTraitsAssoc<std::unordered_map<K, T, H, E, A>> {
+};
+
+}  // namespace detail
+
+// Partial specialization of FormatValue for string-keyed containers
+template <class T>
+class FormatValue<
+  T,
+  typename detail::KeyableTraits<T>::enabled> {
+ public:
+  explicit FormatValue(const T& val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    FormatValue<typename std::decay<
+      typename detail::KeyableTraits<T>::value_type>::type>(
+        detail::KeyableTraits<T>::at(
+            val_, arg.splitKey())).format(arg, cb);
+  }
+
+ private:
+  const T& val_;
+};
+
+// Partial specialization of FormatValue for pairs
+template <class A, class B>
+class FormatValue<std::pair<A, B>> {
+ public:
+  explicit FormatValue(const std::pair<A, B>& val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    int key = arg.splitIntKey();
+    switch (key) {
+    case 0:
+      FormatValue<typename std::decay<A>::type>(val_.first).format(arg, cb);
+      break;
+    case 1:
+      FormatValue<typename std::decay<B>::type>(val_.second).format(arg, cb);
+      break;
+    default:
+      arg.error("invalid index for pair");
+    }
+  }
+
+ private:
+  const std::pair<A, B>& val_;
+};
+
+// Partial specialization of FormatValue for tuples
+template <class... Args>
+class FormatValue<std::tuple<Args...>> {
+  typedef std::tuple<Args...> Tuple;
+ public:
+  explicit FormatValue(const Tuple& val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    int key = arg.splitIntKey();
+    arg.enforce(key >= 0, "tuple index must be non-negative");
+    doFormat(key, arg, cb);
+  }
+
+ private:
+  static constexpr size_t valueCount = std::tuple_size<Tuple>::value;
+
+  template <size_t K, class Callback>
+  typename std::enable_if<K == valueCount>::type
+  doFormatFrom(size_t i, FormatArg& arg, Callback& cb) const {
+    arg.enforce("tuple index out of range, max=", i);
+  }
+
+  template <size_t K, class Callback>
+  typename std::enable_if<(K < valueCount)>::type
+  doFormatFrom(size_t i, FormatArg& arg, Callback& cb) const {
+    if (i == K) {
+      FormatValue<typename std::decay<
+        typename std::tuple_element<K, Tuple>::type>::type>(
+          std::get<K>(val_)).format(arg, cb);
+    } else {
+      doFormatFrom<K+1>(i, arg, cb);
+    }
+  }
+
+  template <class Callback>
+  void doFormat(size_t i, FormatArg& arg, Callback& cb) const {
+    return doFormatFrom<0>(i, arg, cb);
+  }
+
+  const Tuple& val_;
+};
+
+/**
+ * Formatter objects can be appended to strings, and therefore they're
+ * compatible with folly::toAppend and folly::to.
+ */
+template <class Tgt, bool containerMode, class... Args>
+typename std::enable_if<
+   detail::IsSomeString<Tgt>::value>::type
+toAppend(const Formatter<containerMode, Args...>& value, Tgt * result) {
+  value.appendTo(*result);
+}
+
+}  // namespace folly
diff --git a/folly/Format.cpp b/folly/Format.cpp
new file mode 100644
index 00000000..460f7489
--- /dev/null
+++ b/folly/Format.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Format.h"
+
+namespace folly {
+namespace detail {
+
+extern const FormatArg::Align formatAlignTable[];
+extern const FormatArg::Sign formatSignTable[];
+
+}  // namespace detail
+
+using namespace folly::detail;
+
+void FormatArg::initSlow() {
+  auto b = fullArgString.begin();
+  auto end = fullArgString.end();
+
+  // Parse key
+  auto p = static_cast<const char*>(memchr(b, ':', end - b));
+  if (!p) {
+    key_ = StringPiece(b, end);
+    return;
+  }
+  key_ = StringPiece(b, p);
+
+  if (*p == ':') {
+    // parse format spec
+    if (++p == end) return;
+
+    // fill/align, or just align
+    Align a;
+    if (p + 1 != end &&
+        (a = formatAlignTable[static_cast<unsigned char>(p[1])]) !=
+        Align::INVALID) {
+      fill = *p;
+      align = a;
+      p += 2;
+      if (p == end) return;
+    } else if ((a = formatAlignTable[static_cast<unsigned char>(*p)]) !=
+               Align::INVALID) {
+      align = a;
+      if (++p == end) return;
+    }
+
+    Sign s;
+    unsigned char uSign = static_cast<unsigned char>(*p);
+    if ((s = formatSignTable[uSign]) != Sign::INVALID) {
+      sign = s;
+      if (++p == end) return;
+    }
+
+    if (*p == '#') {
+      basePrefix = true;
+      if (++p == end) return;
+    }
+
+    if (*p == '0') {
+      enforce(align == Align::DEFAULT, "alignment specified twice");
+      fill = '0';
+      align = Align::PAD_AFTER_SIGN;
+      if (++p == end) return;
+    }
+
+    if (*p >= '0' && *p <= '9') {
+      auto b = p;
+      do {
+        ++p;
+      } while (p != end && *p >= '0' && *p <= '9');
+      width = to<int>(StringPiece(b, p));
+
+      if (p == end) return;
+    }
+
+    if (*p == ',') {
+      thousandsSeparator = true;
+      if (++p == end) return;
+    }
+
+    if (*p == '.') {
+      auto b = ++p;
+      while (p != end && *p >= '0' && *p <= '9') {
+        ++p;
+      }
+      precision = to<int>(StringPiece(b, p));
+
+      if (p == end) return;
+    }
+
+    presentation = *p;
+    if (++p == end) return;
+  }
+
+  error("extra characters in format string");
+}
+
+void FormatArg::validate(Type type) const {
+  enforce(keyEmpty(), "index not allowed");
+  switch (type) {
+  case Type::INTEGER:
+    enforce(precision == kDefaultPrecision,
+            "precision not allowed on integers");
+    break;
+  case Type::FLOAT:
+    enforce(!basePrefix,
+            "base prefix ('#') specifier only allowed on integers");
+    enforce(!thousandsSeparator,
+            "thousands separator (',') only allowed on integers");
+    break;
+  case Type::OTHER:
+    enforce(align != Align::PAD_AFTER_SIGN,
+            "'='alignment only allowed on numbers");
+    enforce(sign == Sign::DEFAULT,
+            "sign specifier only allowed on numbers");
+    enforce(!basePrefix,
+            "base prefix ('#') specifier only allowed on integers");
+    enforce(!thousandsSeparator,
+            "thousands separator (',') only allowed on integers");
+    break;
+  }
+}
+
+}  // namespace folly
diff --git a/folly/Format.h b/folly/Format.h
new file mode 100644
index 00000000..1f5a58d1
--- /dev/null
+++ b/folly/Format.h
@@ -0,0 +1,272 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_FORMAT_H_
+#define FOLLY_FORMAT_H_
+
+#include <array>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+#include <deque>
+#include <map>
+#include <unordered_map>
+
+#include <double-conversion.h>
+
+#include "folly/FBVector.h"
+#include "folly/Conv.h"
+#include "folly/Range.h"
+#include "folly/Likely.h"
+#include "folly/String.h"
+#include "folly/small_vector.h"
+#include "folly/FormatArg.h"
+
+namespace folly {
+
+// forward declarations
+template <bool containerMode, class... Args> class Formatter;
+template <class... Args>
+Formatter<false, Args...> format(StringPiece fmt, Args&&... args);
+template <class C>
+Formatter<true, C> vformat(StringPiece fmt, C&& container);
+template <class T, class Enable=void> class FormatValue;
+
+/**
+ * Formatter class.
+ *
+ * Note that this class is tricky, as it keeps *references* to its arguments
+ * (and doesn't copy the passed-in format string).  Thankfully, you can't use
+ * this directly, you have to use format(...) below.
+ */
+
+template <bool containerMode, class... Args>
+class Formatter {
+  template <class... A>
+  friend Formatter<false, A...> format(StringPiece fmt, A&&... arg);
+  template <class C>
+  friend Formatter<true, C> vformat(StringPiece fmt, C&& container);
+ public:
+  /**
+   * Append to output.  out(StringPiece sp) may be called (more than once)
+   */
+  template <class Output>
+  void operator()(Output& out) const;
+
+  /**
+   * Append to a string.
+   */
+  template <class Str>
+  typename std::enable_if<detail::IsSomeString<Str>::value>::type
+  appendTo(Str& str) const {
+    auto appender = [&str] (StringPiece s) { str.append(s.data(), s.size()); };
+    (*this)(appender);
+  }
+
+  /**
+   * Conversion to string
+   */
+  std::string str() const {
+    std::string s;
+    appendTo(s);
+    return s;
+  }
+
+  /**
+   * Conversion to fbstring
+   */
+  fbstring fbstr() const {
+    fbstring s;
+    appendTo(s);
+    return s;
+  }
+
+ private:
+  explicit Formatter(StringPiece str, Args&&... args);
+
+  // Not copyable
+  Formatter(const Formatter&) = delete;
+  Formatter& operator=(const Formatter&) = delete;
+
+  // Movable, but the move constructor and assignment operator are private,
+  // for the exclusive use of format() (below).  This way, you can't create
+  // a Formatter object, but can handle references to it (for streaming,
+  // conversion to string, etc) -- which is good, as Formatter objects are
+  // dangerous (they hold references, possibly to temporaries)
+  Formatter(Formatter&&) = default;
+  Formatter& operator=(Formatter&&) = default;
+
+  typedef std::tuple<FormatValue<
+      typename std::decay<Args>::type>...> ValueTuple;
+  static constexpr size_t valueCount = std::tuple_size<ValueTuple>::value;
+
+  template <size_t K, class Callback>
+  typename std::enable_if<K == valueCount>::type
+  doFormatFrom(size_t i, FormatArg& arg, Callback& cb) const {
+    arg.error("argument index out of range, max=", i);
+  }
+
+  template <size_t K, class Callback>
+  typename std::enable_if<(K < valueCount)>::type
+  doFormatFrom(size_t i, FormatArg& arg, Callback& cb) const {
+    if (i == K) {
+      std::get<K>(values_).format(arg, cb);
+    } else {
+      doFormatFrom<K+1>(i, arg, cb);
+    }
+  }
+
+  template <class Callback>
+  void doFormat(size_t i, FormatArg& arg, Callback& cb) const {
+    return doFormatFrom<0>(i, arg, cb);
+  }
+
+  bool containerMode_;
+  StringPiece str_;
+  ValueTuple values_;
+};
+
+/**
+ * Formatter objects can be written to streams.
+ */
+template<bool containerMode, class... Args>
+std::ostream& operator<<(std::ostream& out,
+                         const Formatter<containerMode, Args...>& formatter) {
+  auto writer = [&out] (StringPiece sp) { out.write(sp.data(), sp.size()); };
+  formatter(writer);
+  return out;
+}
+
+/**
+ * Create a formatter object.
+ *
+ * std::string formatted = format("{} {}", 23, 42);
+ * LOG(INFO) << format("{} {}", 23, 42);
+ */
+template <class... Args>
+Formatter<false, Args...> format(StringPiece fmt, Args&&... args) {
+  return Formatter<false, Args...>(
+      fmt, std::forward<Args>(args)...);
+}
+
+/**
+ * Create a formatter object that takes one argument (of container type)
+ * and uses that container to get argument values from.
+ *
+ * std::map<string, string> map { {"hello", "world"}, {"answer", "42"} };
+ *
+ * The following are equivalent:
+ * format("{0[hello]} {0[answer]}", map);
+ *
+ * vformat("{hello} {answer}", map);
+ *
+ * but the latter is cleaner.
+ */
+template <class Container>
+Formatter<true, Container> vformat(StringPiece fmt, Container&& container) {
+  return Formatter<true, Container>(
+      fmt, std::forward<Container>(container));
+}
+
+/**
+ * Append formatted output to a string.
+ *
+ * std::string foo;
+ * format(&foo, "{} {}", 42, 23);
+ *
+ * Shortcut for toAppend(format(...), &foo);
+ */
+template <class Str, class... Args>
+typename std::enable_if<detail::IsSomeString<Str>::value>::type
+format(Str* out, StringPiece fmt, Args&&... args) {
+  format(fmt, std::forward<Args>(args)...).appendTo(*out);
+}
+
+/**
+ * Append vformatted output to a string.
+ */
+template <class Str, class Container>
+typename std::enable_if<detail::IsSomeString<Str>::value>::type
+vformat(Str* out, StringPiece fmt, Container&& container) {
+  vformat(fmt, std::forward<Container>(container)).appendTo(*out);
+}
+
+/**
+ * Utilities for all format value specializations.
+ */
+namespace format_value {
+
+/**
+ * Format a string in "val", obeying appropriate alignment, padding, width,
+ * and precision.  Treats Align::DEFAULT as Align::LEFT, and
+ * Align::PAD_AFTER_SIGN as Align::RIGHT; use formatNumber for
+ * number-specific formatting.
+ */
+template <class FormatCallback>
+void formatString(StringPiece val, FormatArg& arg, FormatCallback& cb);
+
+/**
+ * Format a number in "val"; the first prefixLen characters form the prefix
+ * (sign, "0x" base prefix, etc) which must be left-aligned if the alignment
+ * is Align::PAD_AFTER_SIGN.  Treats Align::DEFAULT as Align::LEFT.  Ignores
+ * arg.precision, as that has a different meaning for numbers (not "maximum
+ * field width")
+ */
+template <class FormatCallback>
+void formatNumber(StringPiece val, int prefixLen, FormatArg& arg,
+                  FormatCallback& cb);
+
+
+/**
+ * Format a Formatter object recursively.  Behaves just like
+ * formatString(fmt.str(), arg, cb); but avoids creating a temporary
+ * string if possible.
+ */
+template <class FormatCallback, bool containerMode, class... Args>
+void formatFormatter(const Formatter<containerMode, Args...>& formatter,
+                     FormatArg& arg,
+                     FormatCallback& cb);
+
+}  // namespace format_value
+
+/*
+ * Specialize folly::FormatValue for your type.
+ *
+ * FormatValue<T> is constructed with a (reference-collapsed) T&&, which is
+ * guaranteed to stay alive until the FormatValue object is destroyed, so you
+ * may keep a reference (or pointer) to it instead of making a copy.
+ *
+ * You must define
+ *   template <class Callback>
+ *   void format(FormatArg& arg, Callback& cb) const;
+ * with the following semantics: format the value using the given argument.
+ *
+ * arg is given by non-const reference for convenience -- it won't be reused,
+ * so feel free to modify it in place if necessary.  (For example, wrap an
+ * existing conversion but change the default, or remove the "key" when
+ * extracting an element from a container)
+ *
+ * Call the callback to append data to the output.  You may call the callback
+ * as many times as you'd like (or not at all, if you want to output an
+ * empty string)
+ */
+
+}  // namespace folly
+
+#include "folly/Format-inl.h"
+
+#endif /* FOLLY_FORMAT_H_ */
+
diff --git a/folly/FormatArg.h b/folly/FormatArg.h
new file mode 100644
index 00000000..99658121
--- /dev/null
+++ b/folly/FormatArg.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_FORMATARG_H_
+#define FOLLY_FORMATARG_H_
+
+#include <stdexcept>
+#include "folly/Range.h"
+#include "folly/Likely.h"
+#include "folly/Conv.h"
+
+namespace folly {
+
+/**
+ * Parsed format argument.
+ */
+struct FormatArg {
+  /**
+   * Parse a format argument from a string.  Keeps a reference to the
+   * passed-in string -- does not copy the given characters.
+   */
+  explicit FormatArg(StringPiece sp)
+    : fullArgString(sp),
+      fill(kDefaultFill),
+      align(Align::DEFAULT),
+      sign(Sign::DEFAULT),
+      basePrefix(false),
+      thousandsSeparator(false),
+      width(kDefaultWidth),
+      precision(kDefaultPrecision),
+      presentation(kDefaultPresentation),
+      nextKeyMode_(NextKeyMode::NONE) {
+    if (!sp.empty()) {
+      initSlow();
+    }
+  }
+
+  enum class Type {
+    INTEGER,
+    FLOAT,
+    OTHER
+  };
+  /**
+   * Validate the argument for the given type; throws on error.
+   */
+  void validate(Type type) const;
+
+  /**
+   * Throw an exception if the first argument is false.  The exception
+   * message will contain the argument string as well as any passed-in
+   * arguments to enforce, formatted using folly::to<std::string>.
+   */
+  template <typename... Args>
+  void enforce(bool v, Args&&... args) const {
+    if (UNLIKELY(!v)) {
+      error(std::forward<Args>(args)...);
+    }
+  }
+
+  template <typename... Args>
+  void error(Args&&... args) const __attribute__((noreturn));
+  /**
+   * Full argument string, as passed in to the constructor.
+   */
+  StringPiece fullArgString;
+
+  /**
+   * Fill
+   */
+  static constexpr char kDefaultFill = '\0';
+  char fill;
+
+  /**
+   * Alignment
+   */
+  enum class Align : uint8_t {
+    DEFAULT,
+    LEFT,
+    RIGHT,
+    PAD_AFTER_SIGN,
+    CENTER,
+    INVALID
+  };
+  Align align;
+
+  /**
+   * Sign
+   */
+  enum class Sign : uint8_t {
+    DEFAULT,
+    PLUS_OR_MINUS,
+    MINUS,
+    SPACE_OR_MINUS,
+    INVALID
+  };
+  Sign sign;
+
+  /**
+   * Output base prefix (0 for octal, 0x for hex)
+   */
+  bool basePrefix;
+
+  /**
+   * Output thousands separator (comma)
+   */
+  bool thousandsSeparator;
+
+  /**
+   * Field width
+   */
+  static constexpr int kDefaultWidth = -1;
+  int width;
+
+  /**
+   * Precision
+   */
+  static constexpr int kDefaultPrecision = -1;
+  int precision;
+
+  /**
+   * Presentation
+   */
+  static constexpr char kDefaultPresentation = '\0';
+  char presentation;
+
+  /**
+   * Split a key component from "key", which must be non-empty (an exception
+   * is thrown otherwise).
+   */
+  template <bool emptyOk=false>
+  StringPiece splitKey();
+
+  /**
+   * Is the entire key empty?
+   */
+  bool keyEmpty() const {
+    return nextKeyMode_ == NextKeyMode::NONE && key_.empty();
+  }
+
+  /**
+   * Split an key component from "key", which must be non-empty and a valid
+   * integer (an exception is thrown otherwise).
+   */
+  int splitIntKey();
+
+  void setNextIntKey(int val) {
+    assert(nextKeyMode_ == NextKeyMode::NONE);
+    nextKeyMode_ = NextKeyMode::INT;
+    nextIntKey_ = val;
+  }
+
+  void setNextKey(StringPiece val) {
+    assert(nextKeyMode_ == NextKeyMode::NONE);
+    nextKeyMode_ = NextKeyMode::STRING;
+    nextKey_ = val;
+  }
+
+ private:
+  void initSlow();
+  template <bool emptyOk>
+  StringPiece doSplitKey();
+
+  StringPiece key_;
+  int nextIntKey_;
+  StringPiece nextKey_;
+  enum class NextKeyMode {
+    NONE,
+    INT,
+    STRING,
+  };
+  NextKeyMode nextKeyMode_;
+};
+
+template <typename... Args>
+inline void FormatArg::error(Args&&... args) const {
+  throw std::invalid_argument(to<std::string>(
+      "folly::format: invalid format argument {", fullArgString, "}: ",
+      std::forward<Args>(args)...));
+}
+
+template <bool emptyOk>
+inline StringPiece FormatArg::splitKey() {
+  enforce(nextKeyMode_ != NextKeyMode::INT, "integer key expected");
+  return doSplitKey<emptyOk>();
+}
+
+template <bool emptyOk>
+inline StringPiece FormatArg::doSplitKey() {
+  if (nextKeyMode_ == NextKeyMode::STRING) {
+    nextKeyMode_ = NextKeyMode::NONE;
+    if (!emptyOk) {  // static
+      enforce(!nextKey_.empty(), "non-empty key required");
+    }
+    return nextKey_;
+  }
+
+  if (key_.empty()) {
+    if (!emptyOk) {  // static
+      error("non-empty key required");
+    }
+    return StringPiece();
+  }
+
+  const char* b = key_.begin();
+  const char* e = key_.end();
+  const char* p;
+  if (e[-1] == ']') {
+    --e;
+    p = static_cast<const char*>(memchr(b, '[', e - b));
+    enforce(p, "unmatched ']'");
+  } else {
+    p = static_cast<const char*>(memchr(b, '.', e - b));
+  }
+  if (p) {
+    key_.assign(p + 1, e);
+  } else {
+    p = e;
+    key_.clear();
+  }
+  if (!emptyOk) {  // static
+    enforce(b != p, "non-empty key required");
+  }
+  return StringPiece(b, p);
+}
+
+inline int FormatArg::splitIntKey() {
+  if (nextKeyMode_ == NextKeyMode::INT) {
+    nextKeyMode_ = NextKeyMode::NONE;
+    return nextIntKey_;
+  }
+  try {
+    return to<int>(doSplitKey<true>());
+  } catch (const std::out_of_range& e) {
+    error("integer key required");
+    return 0;  // unreached
+  }
+}
+
+}  // namespace folly
+
+#endif /* FOLLY_FORMATARG_H_ */
+
diff --git a/folly/GroupVarint.cpp b/folly/GroupVarint.cpp
new file mode 100644
index 00000000..54b9a903
--- /dev/null
+++ b/folly/GroupVarint.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/GroupVarint.h"
+
+namespace folly {
+
+const uint32_t GroupVarint32::kMask[] = {
+  0xff, 0xffff, 0xffffff, 0xffffffff
+};
+
+const uint64_t GroupVarint64::kMask[] = {
+  0xff, 0xffff, 0xffffff, 0xffffffff,
+  0xffffffffffULL, 0xffffffffffffULL, 0xffffffffffffffULL,
+  0xffffffffffffffffULL
+};
+
+}  // namespace folly
+
diff --git a/folly/GroupVarint.h b/folly/GroupVarint.h
new file mode 100644
index 00000000..0af5cc48
--- /dev/null
+++ b/folly/GroupVarint.h
@@ -0,0 +1,600 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_GROUPVARINT_H_
+#define FOLLY_GROUPVARINT_H_
+
+#ifndef __GNUC__
+#error GroupVarint.h requires GCC
+#endif
+
+#if !defined(__x86_64__) && !defined(__i386__)
+#error GroupVarint.h requires x86_64 or i386
+#endif
+
+#include <cstdint>
+#include <limits>
+#include "folly/detail/GroupVarintDetail.h"
+#include "folly/Range.h"
+#include <glog/logging.h>
+
+#ifdef __SSSE3__
+#include <x86intrin.h>
+namespace folly {
+namespace detail {
+extern const __m128i groupVarintSSEMasks[];
+}  // namespace detail
+}  // namespace folly
+#endif
+
+namespace folly {
+namespace detail {
+extern const uint8_t groupVarintLengths[];
+}  // namespace detail
+}  // namespace folly
+
+namespace folly {
+
+template <typename T>
+class GroupVarint;
+
+/**
+ * GroupVarint encoding for 32-bit values.
+ *
+ * Encodes 4 32-bit integers at once, each using 1-4 bytes depending on size.
+ * There is one byte of overhead.  (The first byte contains the lengths of
+ * the four integers encoded as two bits each; 00=1 byte .. 11=4 bytes)
+ *
+ * This implementation assumes little-endian and does unaligned 32-bit
+ * accesses, so it's basically not portable outside of the x86[_64] world.
+ */
+template <>
+class GroupVarint<uint32_t> : public detail::GroupVarintBase<uint32_t> {
+ public:
+
+  /**
+   * Return the number of bytes used to encode these four values.
+   */
+  static size_t size(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+    return kHeaderSize + kGroupSize + key(a) + key(b) + key(c) + key(d);
+  }
+
+  /**
+   * Return the number of bytes used to encode four uint32_t values stored
+   * at consecutive positions in an array.
+   */
+  static size_t size(const uint32_t* p) {
+    return size(p[0], p[1], p[2], p[3]);
+  }
+
+  /**
+   * Return the number of bytes used to encode count (<= 4) values.
+   * If you clip a buffer after these many bytes, you can still decode
+   * the first "count" values correctly (if the remaining size() -
+   * partialSize() bytes are filled with garbage).
+   */
+  static size_t partialSize(const type* p, size_t count) {
+    DCHECK_LE(count, kGroupSize);
+    size_t s = kHeaderSize + count;
+    for (; count; --count, ++p) {
+      s += key(*p);
+    }
+    return s;
+  }
+
+  /**
+   * Return the number of values from *p that are valid from an encoded
+   * buffer of size bytes.
+   */
+  static size_t partialCount(const char* p, size_t size) {
+    char v = *p;
+    size_t s = kHeaderSize;
+    s += 1 + b0key(v);
+    if (s > size) return 0;
+    s += 1 + b1key(v);
+    if (s > size) return 1;
+    s += 1 + b2key(v);
+    if (s > size) return 2;
+    s += 1 + b3key(v);
+    if (s > size) return 3;
+    return 4;
+  }
+
+  /**
+   * Given a pointer to the beginning of an GroupVarint32-encoded block,
+   * return the number of bytes used by the encoding.
+   */
+  static size_t encodedSize(const char* p) {
+    return (kHeaderSize + kGroupSize +
+            b0key(*p) + b1key(*p) + b2key(*p) + b3key(*p));
+  }
+
+  /**
+   * Encode four uint32_t values into the buffer pointed-to by p, and return
+   * the next position in the buffer (that is, one character past the last
+   * encoded byte).  p needs to have at least size()+4 bytes available.
+   */
+  static char* encode(char* p, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+    uint8_t b0key = key(a);
+    uint8_t b1key = key(b);
+    uint8_t b2key = key(c);
+    uint8_t b3key = key(d);
+    *p++ = (b3key << 6) | (b2key << 4) | (b1key << 2) | b0key;
+    *reinterpret_cast<uint32_t*>(p) = a;
+    p += b0key+1;
+    *reinterpret_cast<uint32_t*>(p) = b;
+    p += b1key+1;
+    *reinterpret_cast<uint32_t*>(p) = c;
+    p += b2key+1;
+    *reinterpret_cast<uint32_t*>(p) = d;
+    p += b3key+1;
+    return p;
+  }
+
+  /**
+   * Encode four uint32_t values from the array pointed-to by src into the
+   * buffer pointed-to by p, similar to encode(p,a,b,c,d) above.
+   */
+  static char* encode(char* p, const uint32_t* src) {
+    return encode(p, src[0], src[1], src[2], src[3]);
+  }
+
+  /**
+   * Decode four uint32_t values from a buffer, and return the next position
+   * in the buffer (that is, one character past the last encoded byte).
+   * The buffer needs to have at least 3 extra bytes available (they
+   * may be read but ignored).
+   */
+  static const char* decode_simple(const char* p, uint32_t* a, uint32_t* b,
+                                   uint32_t* c, uint32_t* d) {
+    size_t k = *reinterpret_cast<const uint8_t*>(p);
+    const char* end = p + detail::groupVarintLengths[k];
+    ++p;
+    size_t k0 = b0key(k);
+    *a = *reinterpret_cast<const uint32_t*>(p) & kMask[k0];
+    p += k0+1;
+    size_t k1 = b1key(k);
+    *b = *reinterpret_cast<const uint32_t*>(p) & kMask[k1];
+    p += k1+1;
+    size_t k2 = b2key(k);
+    *c = *reinterpret_cast<const uint32_t*>(p) & kMask[k2];
+    p += k2+1;
+    size_t k3 = b3key(k);
+    *d = *reinterpret_cast<const uint32_t*>(p) & kMask[k3];
+    p += k3+1;
+    return end;
+  }
+
+  /**
+   * Decode four uint32_t values from a buffer and store them in the array
+   * pointed-to by dest, similar to decode(p,a,b,c,d) above.
+   */
+  static const char* decode_simple(const char* p, uint32_t* dest) {
+    return decode_simple(p, dest, dest+1, dest+2, dest+3);
+  }
+
+#ifdef __SSSE3__
+  static const char* decode(const char* p, uint32_t* dest) {
+    uint8_t key = p[0];
+    __m128i val = _mm_loadu_si128((const __m128i*)(p+1));
+    __m128i mask = detail::groupVarintSSEMasks[key];
+    __m128i r = _mm_shuffle_epi8(val, mask);
+    _mm_storeu_si128((__m128i*)dest, r);
+    return p + detail::groupVarintLengths[key];
+  }
+
+  static const char* decode(const char* p, uint32_t* a, uint32_t* b,
+                            uint32_t* c, uint32_t* d) {
+    uint8_t key = p[0];
+    __m128i val = _mm_loadu_si128((const __m128i*)(p+1));
+    __m128i mask = detail::groupVarintSSEMasks[key];
+    __m128i r = _mm_shuffle_epi8(val, mask);
+
+    // Extracting 32 bits at a time out of an XMM register is a SSE4 feature
+#ifdef __SSE4__
+    *a = _mm_extract_epi32(r, 0);
+    *b = _mm_extract_epi32(r, 1);
+    *c = _mm_extract_epi32(r, 2);
+    *d = _mm_extract_epi32(r, 3);
+#else  /* !__SSE4__ */
+    *a = _mm_extract_epi16(r, 0) + (_mm_extract_epi16(r, 1) << 16);
+    *b = _mm_extract_epi16(r, 2) + (_mm_extract_epi16(r, 3) << 16);
+    *c = _mm_extract_epi16(r, 4) + (_mm_extract_epi16(r, 5) << 16);
+    *d = _mm_extract_epi16(r, 6) + (_mm_extract_epi16(r, 7) << 16);
+#endif  /* __SSE4__ */
+
+    return p + detail::groupVarintLengths[key];
+  }
+
+#else  /* !__SSSE3__ */
+  static const char* decode(const char* p, uint32_t* a, uint32_t* b,
+                            uint32_t* c, uint32_t* d) {
+    return decode_simple(p, a, b, c, d);
+  }
+
+  static const char* decode(const char* p, uint32_t* dest) {
+    return decode_simple(p, dest);
+  }
+#endif  /* __SSSE3__ */
+
+ private:
+  static uint8_t key(uint32_t x) {
+    // __builtin_clz is undefined for the x==0 case
+    return 3 - (__builtin_clz(x|1) / 8);
+  }
+  static size_t b0key(size_t x) { return x & 3; }
+  static size_t b1key(size_t x) { return (x >> 2) & 3; }
+  static size_t b2key(size_t x) { return (x >> 4) & 3; }
+  static size_t b3key(size_t x) { return (x >> 6) & 3; }
+
+  static const uint32_t kMask[];
+};
+
+
+/**
+ * GroupVarint encoding for 64-bit values.
+ *
+ * Encodes 5 64-bit integers at once, each using 1-8 bytes depending on size.
+ * There are two bytes of overhead.  (The first two bytes contain the lengths
+ * of the five integers encoded as three bits each; 000=1 byte .. 111 = 8 bytes)
+ *
+ * This implementation assumes little-endian and does unaligned 64-bit
+ * accesses, so it's basically not portable outside of the x86[_64] world.
+ */
+template <>
+class GroupVarint<uint64_t> : public detail::GroupVarintBase<uint64_t> {
+ public:
+  /**
+   * Return the number of bytes used to encode these five values.
+   */
+  static size_t size(uint64_t a, uint64_t b, uint64_t c, uint64_t d,
+                     uint64_t e) {
+    return (kHeaderSize + kGroupSize +
+            key(a) + key(b) + key(c) + key(d) + key(e));
+  }
+
+  /**
+   * Return the number of bytes used to encode five uint64_t values stored
+   * at consecutive positions in an array.
+   */
+  static size_t size(const uint64_t* p) {
+    return size(p[0], p[1], p[2], p[3], p[4]);
+  }
+
+  /**
+   * Return the number of bytes used to encode count (<= 4) values.
+   * If you clip a buffer after these many bytes, you can still decode
+   * the first "count" values correctly (if the remaining size() -
+   * partialSize() bytes are filled with garbage).
+   */
+  static size_t partialSize(const type* p, size_t count) {
+    DCHECK_LE(count, kGroupSize);
+    size_t s = kHeaderSize + count;
+    for (; count; --count, ++p) {
+      s += key(*p);
+    }
+    return s;
+  }
+
+  /**
+   * Return the number of values from *p that are valid from an encoded
+   * buffer of size bytes.
+   */
+  static size_t partialCount(const char* p, size_t size) {
+    uint16_t v = *reinterpret_cast<const uint16_t*>(p);
+    size_t s = kHeaderSize;
+    s += 1 + b0key(v);
+    if (s > size) return 0;
+    s += 1 + b1key(v);
+    if (s > size) return 1;
+    s += 1 + b2key(v);
+    if (s > size) return 2;
+    s += 1 + b3key(v);
+    if (s > size) return 3;
+    s += 1 + b4key(v);
+    if (s > size) return 4;
+    return 5;
+  }
+
+  /**
+   * Given a pointer to the beginning of an GroupVarint64-encoded block,
+   * return the number of bytes used by the encoding.
+   */
+  static size_t encodedSize(const char* p) {
+    uint16_t n = *reinterpret_cast<const uint16_t*>(p);
+    return (kHeaderSize + kGroupSize +
+            b0key(n) + b1key(n) + b2key(n) + b3key(n) + b4key(n));
+  }
+
+  /**
+   * Encode five uint64_t values into the buffer pointed-to by p, and return
+   * the next position in the buffer (that is, one character past the last
+   * encoded byte).  p needs to have at least size()+8 bytes available.
+   */
+  static char* encode(char* p, uint64_t a, uint64_t b, uint64_t c,
+                      uint64_t d, uint64_t e) {
+    uint8_t b0key = key(a);
+    uint8_t b1key = key(b);
+    uint8_t b2key = key(c);
+    uint8_t b3key = key(d);
+    uint8_t b4key = key(e);
+    *reinterpret_cast<uint16_t*>(p) =
+      (b4key << 12) | (b3key << 9) | (b2key << 6) | (b1key << 3) | b0key;
+    p += 2;
+    *reinterpret_cast<uint64_t*>(p) = a;
+    p += b0key+1;
+    *reinterpret_cast<uint64_t*>(p) = b;
+    p += b1key+1;
+    *reinterpret_cast<uint64_t*>(p) = c;
+    p += b2key+1;
+    *reinterpret_cast<uint64_t*>(p) = d;
+    p += b3key+1;
+    *reinterpret_cast<uint64_t*>(p) = e;
+    p += b4key+1;
+    return p;
+  }
+
+  /**
+   * Encode five uint64_t values from the array pointed-to by src into the
+   * buffer pointed-to by p, similar to encode(p,a,b,c,d,e) above.
+   */
+  static char* encode(char* p, const uint64_t* src) {
+    return encode(p, src[0], src[1], src[2], src[3], src[4]);
+  }
+
+  /**
+   * Decode five uint64_t values from a buffer, and return the next position
+   * in the buffer (that is, one character past the last encoded byte).
+   * The buffer needs to have at least 7 bytes available (they may be read
+   * but ignored).
+   */
+  static const char* decode(const char* p, uint64_t* a, uint64_t* b,
+                            uint64_t* c, uint64_t* d, uint64_t* e) {
+    uint16_t k = *reinterpret_cast<const uint16_t*>(p);
+    p += 2;
+    uint8_t k0 = b0key(k);
+    *a = *reinterpret_cast<const uint64_t*>(p) & kMask[k0];
+    p += k0+1;
+    uint8_t k1 = b1key(k);
+    *b = *reinterpret_cast<const uint64_t*>(p) & kMask[k1];
+    p += k1+1;
+    uint8_t k2 = b2key(k);
+    *c = *reinterpret_cast<const uint64_t*>(p) & kMask[k2];
+    p += k2+1;
+    uint8_t k3 = b3key(k);
+    *d = *reinterpret_cast<const uint64_t*>(p) & kMask[k3];
+    p += k3+1;
+    uint8_t k4 = b4key(k);
+    *e = *reinterpret_cast<const uint64_t*>(p) & kMask[k4];
+    p += k4+1;
+    return p;
+  }
+
+  /**
+   * Decode five uint64_t values from a buffer and store them in the array
+   * pointed-to by dest, similar to decode(p,a,b,c,d,e) above.
+   */
+  static const char* decode(const char* p, uint64_t* dest) {
+    return decode(p, dest, dest+1, dest+2, dest+3, dest+4);
+  }
+
+ private:
+  enum { kHeaderBytes = 2 };
+
+  static uint8_t key(uint64_t x) {
+    // __builtin_clzll is undefined for the x==0 case
+    return 7 - (__builtin_clzll(x|1) / 8);
+  }
+
+  static uint8_t b0key(uint16_t x) { return x & 7; }
+  static uint8_t b1key(uint16_t x) { return (x >> 3) & 7; }
+  static uint8_t b2key(uint16_t x) { return (x >> 6) & 7; }
+  static uint8_t b3key(uint16_t x) { return (x >> 9) & 7; }
+  static uint8_t b4key(uint16_t x) { return (x >> 12) & 7; }
+
+  static const uint64_t kMask[];
+};
+
+typedef GroupVarint<uint32_t> GroupVarint32;
+typedef GroupVarint<uint64_t> GroupVarint64;
+
+/**
+ * Simplify use of GroupVarint* for the case where data is available one
+ * entry at a time (instead of one group at a time).  Handles buffering
+ * and an incomplete last chunk.
+ *
+ * Output is a function object that accepts character ranges:
+ * out(StringPiece) appends the given character range to the output.
+ */
+template <class T, class Output>
+class GroupVarintEncoder {
+ public:
+  typedef GroupVarint<T> Base;
+  typedef T type;
+
+  explicit GroupVarintEncoder(Output out)
+    : out_(out),
+      count_(0) {
+  }
+
+  ~GroupVarintEncoder() {
+    finish();
+  }
+
+  /**
+   * Add a value to the encoder.
+   */
+  void add(type val) {
+    buf_[count_++] = val;
+    if (count_ == Base::kGroupSize) {
+      char* p = Base::encode(tmp_, buf_);
+      out_(StringPiece(tmp_, p));
+      count_ = 0;
+    }
+  }
+
+  /**
+   * Finish encoding, flushing any buffered values if necessary.
+   * After finish(), the encoder is immediately ready to encode more data
+   * to the same output.
+   */
+  void finish() {
+    if (count_) {
+      // This is not strictly necessary, but it makes testing easy;
+      // uninitialized bytes are guaranteed to be recorded as taking one byte
+      // (not more).
+      for (size_t i = count_; i < Base::kGroupSize; i++) {
+        buf_[i] = 0;
+      }
+      Base::encode(tmp_, buf_);
+      out_(StringPiece(tmp_, Base::partialSize(buf_, count_)));
+      count_ = 0;
+    }
+  }
+
+  /**
+   * Return the appender that was used.
+   */
+  Output& output() {
+    return out_;
+  }
+  const Output& output() const {
+    return out_;
+  }
+
+  /**
+   * Reset the encoder, disregarding any state (except what was already
+   * flushed to the output, of course).
+   */
+  void clear() {
+    count_ = 0;
+  }
+
+ private:
+  Output out_;
+  char tmp_[Base::kMaxSize];
+  type buf_[Base::kGroupSize];
+  size_t count_;
+};
+
+/**
+ * Simplify use of GroupVarint* for the case where the last group in the
+ * input may be incomplete (but the exact size of the input is known).
+ * Allows for extracting values one at a time.
+ */
+template <typename T>
+class GroupVarintDecoder {
+ public:
+  typedef GroupVarint<T> Base;
+  typedef T type;
+
+  GroupVarintDecoder() { }
+
+  explicit GroupVarintDecoder(StringPiece data,
+                              size_t maxCount = (size_t)-1)
+    : p_(data.data()),
+      end_(data.data() + data.size()),
+      pos_(0),
+      count_(0),
+      remaining_(maxCount) {
+  }
+
+  void reset(StringPiece data, size_t maxCount=(size_t)-1) {
+    p_ = data.data();
+    end_ = data.data() + data.size();
+    pos_ = 0;
+    count_ = 0;
+    remaining_ = maxCount;
+  }
+
+  /**
+   * Read and return the next value.
+   */
+  bool next(type* val) {
+    if (pos_ == count_) {
+      // refill
+      size_t rem = end_ - p_;
+      if (rem == 0 || remaining_ == 0) {
+        return false;
+      }
+      // next() attempts to read one full group at a time, and so we must have
+      // at least enough bytes readable after its end to handle the case if the
+      // last group is full.
+      //
+      // The best way to ensure this is to ensure that data has at least
+      // Base::kMaxSize - 1 bytes readable *after* the end, otherwise we'll copy
+      // into a temporary buffer.
+      if (rem < Base::kMaxSize) {
+        memcpy(tmp_, p_, rem);
+        p_ = tmp_;
+        end_ = p_ + rem;
+      }
+      pos_ = 0;
+      const char* n = Base::decode(p_, buf_);
+      if (n <= end_) {
+        // Full group could be decoded
+        if (remaining_ >= Base::kGroupSize) {
+          remaining_ -= Base::kGroupSize;
+          count_ = Base::kGroupSize;
+          p_ = n;
+        } else {
+          count_ = remaining_;
+          remaining_ = 0;
+          p_ += Base::partialSize(buf_, count_);
+        }
+      } else {
+        // Can't decode a full group
+        count_ = Base::partialCount(p_, end_ - p_);
+        if (remaining_ >= count_) {
+          remaining_ -= count_;
+          p_ = end_;
+        } else {
+          count_ = remaining_;
+          remaining_ = 0;
+          p_ += Base::partialSize(buf_, count_);
+        }
+        if (count_ == 0) {
+          return false;
+        }
+      }
+    }
+    *val = buf_[pos_++];
+    return true;
+  }
+
+  StringPiece rest() const {
+    // This is only valid after next() returned false
+    CHECK(pos_ == count_ && (p_ == end_ || remaining_ == 0));
+    return StringPiece(p_, end_ - p_);
+  }
+
+ private:
+  const char* p_;
+  const char* end_;
+  char tmp_[Base::kMaxSize];
+  type buf_[Base::kGroupSize];
+  size_t pos_;
+  size_t count_;
+  size_t remaining_;
+};
+
+typedef GroupVarintDecoder<uint32_t> GroupVarint32Decoder;
+typedef GroupVarintDecoder<uint64_t> GroupVarint64Decoder;
+
+}  // namespace folly
+
+#endif /* FOLLY_GROUPVARINT_H_ */
+
diff --git a/folly/Hash.h b/folly/Hash.h
new file mode 100644
index 00000000..a52da9c6
--- /dev/null
+++ b/folly/Hash.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_BASE_HASH_H_
+#define FOLLY_BASE_HASH_H_
+
+#include <stdint.h>
+#include <cstring>
+#include <string>
+
+/*
+ * Various hashing functions.
+ */
+
+namespace folly { namespace hash {
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * Thomas Wang 64 bit mix hash function
+ */
+
+inline uint64_t twang_mix64(uint64_t key) {
+  key = (~key) + (key << 21);
+  key = key ^ (key >> 24);
+  key = (key + (key << 3)) + (key << 8);
+  key = key ^ (key >> 14);
+  key = (key + (key << 2)) + (key << 4);
+  key = key ^ (key >> 28);
+  key = key + (key << 31);
+  return key;
+}
+
+/*
+ * Thomas Wang downscaling hash function
+ */
+
+inline uint32_t twang_32from64(uint64_t key) {
+  key = (~key) + (key << 18);
+  key = key ^ (key >> 31);
+  key = key * 21;
+  key = key ^ (key >> 11);
+  key = key + (key << 6);
+  key = key ^ (key >> 22);
+  return (uint32_t) key;
+}
+
+/*
+ * Robert Jenkins' reversible 32 bit mix hash function
+ */
+
+inline uint32_t jenkins_rev_mix32(uint32_t key) {
+  key += (key << 12);
+  key ^= (key >> 22);
+  key += (key << 4);
+  key ^= (key >> 9);
+  key += (key << 10);
+  key ^= (key >> 2);
+  key += (key << 7);
+  key += (key << 12);
+  return key;
+}
+
+/*
+ * Fowler / Noll / Vo (FNV) Hash
+ *     http://www.isthe.com/chongo/tech/comp/fnv/
+ */
+
+const uint32_t FNV_32_HASH_START = 216613626UL;
+const uint64_t FNV_64_HASH_START = 14695981039346656037ULL;
+
+inline uint32_t fnv32(const char* s,
+                      uint32_t hash = FNV_32_HASH_START) {
+  for (; *s; ++s) {
+    hash += (hash << 1) + (hash << 4) + (hash << 7) +
+            (hash << 8) + (hash << 24);
+    hash ^= *s;
+  }
+  return hash;
+}
+
+inline uint32_t fnv32_buf(const void* buf,
+                          int n,
+                          uint32_t hash = FNV_32_HASH_START) {
+  const char* char_buf = reinterpret_cast<const char*>(buf);
+
+  for (int i = 0; i < n; ++i) {
+    hash += (hash << 1) + (hash << 4) + (hash << 7) +
+            (hash << 8) + (hash << 24);
+    hash ^= char_buf[i];
+  }
+
+  return hash;
+}
+
+inline uint32_t fnv32(const std::string& str,
+                      uint64_t hash = FNV_32_HASH_START) {
+  return fnv32_buf(str.data(), str.size(), hash);
+}
+
+inline uint64_t fnv64(const char* s,
+                      uint64_t hash = FNV_64_HASH_START) {
+  for (; *s; ++s) {
+    hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
+      (hash << 8) + (hash << 40);
+    hash ^= *s;
+  }
+  return hash;
+}
+
+inline uint64_t fnv64_buf(const void* buf,
+                          int n,
+                          uint64_t hash = FNV_64_HASH_START) {
+  const char* char_buf = reinterpret_cast<const char*>(buf);
+
+  for (int i = 0; i < n; ++i) {
+    hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
+      (hash << 8) + (hash << 40);
+    hash ^= char_buf[i];
+  }
+  return hash;
+}
+
+inline uint64_t fnv64(const std::string& str,
+                      uint64_t hash = FNV_64_HASH_START) {
+  return fnv64_buf(str.data(), str.size(), hash);
+}
+
+/*
+ * Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html
+ */
+
+#define get16bits(d) (*((const uint16_t*) (d)))
+
+inline uint32_t hsieh_hash32_buf(const void* buf, int len) {
+  const char* s = reinterpret_cast<const char*>(buf);
+  uint32_t hash = len;
+  uint32_t tmp;
+  int rem;
+
+  if (len <= 0 || buf == 0) {
+    return 0;
+  }
+
+  rem = len & 3;
+  len >>= 2;
+
+  /* Main loop */
+  for (;len > 0; len--) {
+    hash  += get16bits (s);
+    tmp    = (get16bits (s+2) << 11) ^ hash;
+    hash   = (hash << 16) ^ tmp;
+    s  += 2*sizeof (uint16_t);
+    hash  += hash >> 11;
+  }
+
+  /* Handle end cases */
+  switch (rem) {
+  case 3:
+    hash += get16bits(s);
+    hash ^= hash << 16;
+    hash ^= s[sizeof (uint16_t)] << 18;
+    hash += hash >> 11;
+    break;
+  case 2:
+    hash += get16bits(s);
+    hash ^= hash << 11;
+    hash += hash >> 17;
+    break;
+  case 1:
+    hash += *s;
+    hash ^= hash << 10;
+    hash += hash >> 1;
+  }
+
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
+
+  return hash;
+};
+
+#undef get16bits
+
+inline uint32_t hsieh_hash32(const char* s) {
+  return hsieh_hash32_buf(s, std::strlen(s));
+}
+
+inline uint32_t hsieh_hash32_str(const std::string& str) {
+  return hsieh_hash32_buf(str.data(), str.size());
+}
+
+//////////////////////////////////////////////////////////////////////
+
+} // namespace hash
+
+template<class Key>
+struct hasher;
+
+template<> struct hasher<int32_t> {
+  size_t operator()(int32_t key) const {
+    return hash::jenkins_rev_mix32(uint32_t(key));
+  }
+};
+
+template<> struct hasher<uint32_t> {
+  size_t operator()(uint32_t key) const {
+    return hash::jenkins_rev_mix32(key);
+  }
+};
+
+template<> struct hasher<int64_t> {
+  size_t operator()(int64_t key) const {
+    return hash::twang_mix64(uint64_t(key));
+  }
+};
+
+template<> struct hasher<uint64_t> {
+  size_t operator()(uint64_t key) const {
+    return hash::twang_mix64(key);
+  }
+};
+
+} // namespace folly
+
+#endif
diff --git a/folly/Histogram-inl.h b/folly/Histogram-inl.h
new file mode 100644
index 00000000..97d2bffd
--- /dev/null
+++ b/folly/Histogram-inl.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_HISTOGRAM_INL_H_
+#define FOLLY_HISTOGRAM_INL_H_
+
+#include "folly/Conv.h"
+
+#include <glog/logging.h>
+
+namespace folly {
+
+namespace detail {
+
+template <typename T, typename BucketT>
+HistogramBuckets<T, BucketT>::HistogramBuckets(ValueType bucketSize,
+                                               ValueType min,
+                                               ValueType max,
+                                               const BucketType& defaultBucket)
+  : bucketSize_(bucketSize),
+    min_(min),
+    max_(max) {
+  CHECK_GT(bucketSize_, ValueType(0));
+  CHECK_LT(min_, max_);
+
+  unsigned int numBuckets = (max - min) / bucketSize;
+  // Round up if the bucket size does not fit evenly
+  if (numBuckets * bucketSize < max - min) {
+    ++numBuckets;
+  }
+  // Add 2 for the extra 'below min' and 'above max' buckets
+  numBuckets += 2;
+  buckets_.assign(numBuckets, defaultBucket);
+}
+
+template <typename T, typename BucketType>
+unsigned int HistogramBuckets<T, BucketType>::getBucketIdx(
+    ValueType value) const {
+  if (value < min_) {
+    return 0;
+  } else if (value >= max_) {
+    return buckets_.size() - 1;
+  } else {
+    // the 1 is the below_min bucket
+    return ((value - min_) / bucketSize_) + 1;
+  }
+}
+
+template <typename T, typename BucketType>
+template <typename CountFn>
+unsigned int HistogramBuckets<T, BucketType>::getPercentileBucketIdx(
+    double pct,
+    CountFn countFromBucket,
+    double* lowPct, double* highPct) const {
+  CHECK_GE(pct, 0.0);
+  CHECK_LE(pct, 1.0);
+
+  unsigned int numBuckets = buckets_.size();
+
+  // Compute the counts in each bucket
+  std::vector<uint64_t> counts(numBuckets);
+  uint64_t totalCount = 0;
+  for (unsigned int n = 0; n < numBuckets; ++n) {
+    uint64_t bucketCount =
+      countFromBucket(const_cast<const BucketType&>(buckets_[n]));
+    counts[n] = bucketCount;
+    totalCount += bucketCount;
+  }
+
+  // If there are no elements, just return the lowest bucket.
+  // Note that we return bucket 1, which is the first bucket in the
+  // histogram range; bucket 0 is for all values below min_.
+  if (totalCount == 0) {
+    // Set lowPct and highPct both to 0.
+    // getPercentileEstimate() will recognize this to mean that the histogram
+    // is empty.
+    if (lowPct) {
+      *lowPct = 0.0;
+    }
+    if (highPct) {
+      *highPct = 0.0;
+    }
+    return 1;
+  }
+
+  // Loop through all the buckets, keeping track of each bucket's
+  // percentile range: [0,10], [10,17], [17,45], etc.  When we find a range
+  // that includes our desired percentile, we return that bucket index.
+  double prevPct = 0.0;
+  double curPct = 0.0;
+  uint64_t curCount = 0;
+  unsigned int idx;
+  for (idx = 0; idx < numBuckets; ++idx) {
+    if (counts[idx] == 0) {
+      // skip empty buckets
+      continue;
+    }
+
+    prevPct = curPct;
+    curCount += counts[idx];
+    curPct = static_cast<double>(curCount) / totalCount;
+    if (pct <= curPct) {
+      // This is the desired bucket
+      break;
+    }
+  }
+
+  if (lowPct) {
+    *lowPct = prevPct;
+  }
+  if (highPct) {
+    *highPct = curPct;
+  }
+  return idx;
+}
+
+template <typename T, typename BucketType>
+template <typename CountFn, typename AvgFn>
+T HistogramBuckets<T, BucketType>::getPercentileEstimate(
+    double pct,
+    CountFn countFromBucket,
+    AvgFn avgFromBucket) const {
+
+  // Find the bucket where this percentile falls
+  double lowPct;
+  double highPct;
+  unsigned int bucketIdx = getPercentileBucketIdx(pct, countFromBucket,
+                                                  &lowPct, &highPct);
+  if (lowPct == 0.0 && highPct == 0.0) {
+    // Invalid range -- the buckets must all be empty
+    // Return the default value for ValueType.
+    return ValueType();
+  }
+  if (lowPct == highPct) {
+    // Unlikely to have exact equality,
+    // but just return the bucket average in this case.
+    // We handle this here to avoid division by 0 below.
+    return avgFromBucket(buckets_[bucketIdx]);
+  }
+
+  CHECK_GE(pct, lowPct);
+  CHECK_LE(pct, highPct);
+  CHECK_LT(lowPct, highPct);
+
+  // Compute information about this bucket
+  ValueType avg = avgFromBucket(buckets_[bucketIdx]);
+  ValueType low;
+  ValueType high;
+  if (bucketIdx == 0) {
+    if (avg > min_) {
+      // This normally shouldn't happen.  This bucket is only supposed to track
+      // values less than min_.  Most likely this means that integer overflow
+      // occurred, and the code in avgFromBucket() returned a huge value
+      // instead of a small one.  Just return the minimum possible value for
+      // now.
+      //
+      // (Note that if the counter keeps being decremented, eventually it will
+      // wrap and become small enough that we won't detect this any more, and
+      // we will return bogus information.)
+      LOG(ERROR) << "invalid average value in histogram minimum bucket: " <<
+        avg << " > " << min_ << ": possible integer overflow?";
+      return getBucketMin(bucketIdx);
+    }
+    // For the below-min bucket, just assume the lowest value ever seen is
+    // twice as far away from min_ as avg.
+    high = min_;
+    low = high - (2 * (high - avg));
+    // Adjust low in case it wrapped
+    if (low > avg) {
+      low = std::numeric_limits<ValueType>::min();
+    }
+  } else if (bucketIdx == buckets_.size() - 1) {
+    if (avg < max_) {
+      // Most likely this means integer overflow occurred.  See the comments
+      // above in the minimum case.
+      LOG(ERROR) << "invalid average value in histogram maximum bucket: " <<
+        avg << " < " << max_ << ": possible integer overflow?";
+      return getBucketMax(bucketIdx);
+    }
+    // Similarly for the above-max bucket, assume the highest value ever seen
+    // is twice as far away from max_ as avg.
+    low = max_;
+    high = low + (2 * (avg - low));
+    // Adjust high in case it wrapped
+    if (high < avg) {
+      high = std::numeric_limits<ValueType>::max();
+    }
+  } else {
+    low = getBucketMin(bucketIdx);
+    high = getBucketMax(bucketIdx);
+    if (avg < low || avg > high) {
+      // Most likely this means an integer overflow occurred.
+      // See the comments above.  Return the midpoint between low and high
+      // as a best guess, since avg is meaningless.
+      LOG(ERROR) << "invalid average value in histogram bucket: " <<
+        avg << " not in range [" << low << ", " << high <<
+        "]: possible integer overflow?";
+      return (low + high) / 2;
+    }
+  }
+
+  // Since we know the average value in this bucket, we can do slightly better
+  // than just assuming the data points in this bucket are uniformly
+  // distributed between low and high.
+  //
+  // Assume that the median value in this bucket is the same as the average
+  // value.
+  double medianPct = (lowPct + highPct) / 2.0;
+  if (pct < medianPct) {
+    // Assume that the data points lower than the median of this bucket
+    // are uniformly distributed between low and avg
+    double pctThroughSection = (pct - lowPct) / (medianPct - lowPct);
+    return low + ((avg - low) * pctThroughSection);
+  } else {
+    // Assume that the data points greater than the median of this bucket
+    // are uniformly distributed between avg and high
+    double pctThroughSection = (pct - medianPct) / (highPct - medianPct);
+    return avg + ((high - avg) * pctThroughSection);
+  }
+}
+
+} // detail
+
+
+template <typename T>
+std::string Histogram<T>::debugString() const {
+  std::string ret = folly::to<std::string>(
+      "num buckets: ", buckets_.getNumBuckets(),
+      ", bucketSize: ", buckets_.getBucketSize(),
+      ", min: ", buckets_.getMin(), ", max: ", buckets_.getMax(), "\n");
+
+  for (unsigned int n = 0; n < buckets_.getNumBuckets(); ++n) {
+    folly::toAppend("  ", buckets_.getBucketMin(n), ": ",
+                    buckets_.getByIndex(n).count, "\n",
+                    &ret);
+  }
+
+  return ret;
+}
+
+} // folly
+
+#endif // FOLLY_HISTOGRAM_INL_H_
diff --git a/folly/Histogram.h b/folly/Histogram.h
new file mode 100644
index 00000000..6b7cbcdd
--- /dev/null
+++ b/folly/Histogram.h
@@ -0,0 +1,381 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_HISTOGRAM_H_
+#define FOLLY_HISTOGRAM_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <vector>
+
+namespace folly {
+
+namespace detail {
+
+/*
+ * A helper class to manage a set of histogram buckets.
+ */
+template <typename T, typename BucketT>
+class HistogramBuckets {
+ public:
+  typedef T ValueType;
+  typedef BucketT BucketType;
+
+  /*
+   * Create a set of histogram buckets.
+   *
+   * One bucket will be created for each bucketSize interval of values within
+   * the specified range.  Additionally, one bucket will be created to track
+   * all values that fall below the specified minimum, and one bucket will be
+   * created for all values above the specified maximum.
+   *
+   * If (max - min) is not a multiple of bucketSize, the last bucket will cover
+   * a smaller range of values than the other buckets.
+   *
+   * (max - min) must be larger than or equal to bucketSize.
+   */
+  HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max,
+                   const BucketType& defaultBucket);
+
+  /* Returns the bucket size of each bucket in the histogram. */
+  ValueType getBucketSize() const {
+    return bucketSize_;
+  }
+
+  /* Returns the min value at which bucketing begins. */
+  ValueType getMin() const {
+    return min_;
+  }
+
+  /* Returns the max value at which bucketing ends. */
+  ValueType getMax() const {
+    return max_;
+  }
+
+  /*
+   * Returns the number of buckets.
+   *
+   * This includes the total number of buckets for the [min, max) range,
+   * plus 2 extra buckets, one for handling values less than min, and one for
+   * values greater than max.
+   */
+  unsigned int getNumBuckets() const {
+    return buckets_.size();
+  }
+
+  /* Returns the bucket index into which the given value would fall. */
+  unsigned int getBucketIdx(ValueType value) const;
+
+  /* Returns the bucket for the specified value */
+  BucketType& getByValue(ValueType value) {
+    return buckets_[getBucketIdx(value)];
+  }
+
+  /* Returns the bucket for the specified value */
+  const BucketType& getByValue(ValueType value) const {
+    return buckets_[getBucketIdx(value)];
+  }
+
+  /*
+   * Returns the bucket at the specified index.
+   *
+   * Note that index 0 is the bucket for all values less than the specified
+   * minimum.  Index 1 is the first bucket in the specified bucket range.
+   */
+  BucketType& getByIndex(unsigned int idx) {
+    return buckets_[idx];
+  }
+
+  /* Returns the bucket at the specified index. */
+  const BucketType& getByIndex(unsigned int idx) const {
+    return buckets_[idx];
+  }
+
+  /*
+   * Returns the minimum threshold for the bucket at the given index.
+   *
+   * The bucket at the specified index will store values in the range
+   * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
+   * max is smaller than bucketMin + bucketSize.
+   */
+  ValueType getBucketMin(unsigned int idx) const {
+    if (idx == 0) {
+      return std::numeric_limits<ValueType>::min();
+    }
+    if (idx == buckets_.size() - 1) {
+      return max_;
+    }
+
+    return min_ + ((idx - 1) * bucketSize_);
+  }
+
+  /*
+   * Returns the maximum threshold for the bucket at the given index.
+   *
+   * The bucket at the specified index will store values in the range
+   * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
+   * max is smaller than bucketMin + bucketSize.
+   */
+  ValueType getBucketMax(unsigned int idx) const {
+    if (idx == buckets_.size() - 1) {
+      return std::numeric_limits<ValueType>::max();
+    }
+
+    return min_ + (idx * bucketSize_);
+  }
+
+  /**
+   * Determine which bucket the specified percentile falls into.
+   *
+   * Looks for the bucket that contains the Nth percentile data point.
+   *
+   * @param pct     The desired percentile to find, as a value from 0.0 to 1.0.
+   * @param countFn A function that takes a const BucketType&, and returns the
+   *                number of values in that bucket.
+   * @param lowPct  The lowest percentile stored in the selected bucket will be
+   *                returned via this parameter.
+   * @param highPct The highest percentile stored in the selected bucket will
+   *                be returned via this parameter.
+   *
+   * @return Returns the index of the bucket that contains the Nth percentile
+   *         data point.
+   */
+  template <typename CountFn>
+  unsigned int getPercentileBucketIdx(double pct,
+                                      CountFn countFromBucket,
+                                      double* lowPct = NULL,
+                                      double* highPct = NULL) const;
+
+  /**
+   * Estimate the value at the specified percentile.
+   *
+   * @param pct     The desired percentile to find, as a value from 0.0 to 1.0.
+   * @param countFn A function that takes a const BucketType&, and returns the
+   *                number of values in that bucket.
+   * @param avgFn   A function that takes a const BucketType&, and returns the
+   *                average of all the values in that bucket.
+   *
+   * @return Returns an estimate for N, where N is the number where exactly pct
+   *         percentage of the data points in the histogram are less than N.
+   */
+  template <typename CountFn, typename AvgFn>
+  ValueType getPercentileEstimate(double pct,
+                                  CountFn countFromBucket,
+                                  AvgFn avgFromBucket) const;
+
+  /*
+   * Iterator access to the buckets.
+   *
+   * Note that the first bucket is for all values less than min, and the last
+   * bucket is for all values greater than max.  The buckets tracking values in
+   * the [min, max) actually start at the second bucket.
+   */
+  typename std::vector<BucketType>::const_iterator begin() const {
+    return buckets_.begin();
+  }
+  typename std::vector<BucketType>::iterator begin() {
+    return buckets_.begin();
+  }
+  typename std::vector<BucketType>::const_iterator end() const {
+    return buckets_.end();
+  }
+  typename std::vector<BucketType>::iterator end() {
+    return buckets_.end();
+  }
+
+ private:
+  const ValueType bucketSize_;
+  const ValueType min_;
+  const ValueType max_;
+  std::vector<BucketType> buckets_;
+};
+
+} // detail
+
+
+/*
+ * A basic histogram class.
+ *
+ * Groups data points into equally-sized buckets, and stores the overall sum of
+ * the data points in each bucket, as well as the number of data points in the
+ * bucket.
+ *
+ * The caller must specify the minimum and maximum data points to expect ahead
+ * of time, as well as the bucket width.
+ */
+template <typename T>
+class Histogram {
+ public:
+  typedef T ValueType;
+
+  struct Bucket {
+    Bucket()
+      : sum(0),
+        count(0) {}
+
+    void clear() {
+      sum = 0;
+      count = 0;
+    }
+
+    ValueType sum;
+    uint64_t count;
+  };
+
+  Histogram(ValueType bucketSize, ValueType min, ValueType max)
+    : buckets_(bucketSize, min, max, Bucket()) {}
+
+  /* Add a data point to the histogram */
+  void addValue(ValueType value) {
+    Bucket& bucket = buckets_.getByValue(value);
+    // TODO: It would be nice to handle overflow here.
+    bucket.sum += value;
+    bucket.count += 1;
+  }
+
+  /*
+   * Remove a data point to the histogram
+   *
+   * Note that this method does not actually verify that this exact data point
+   * had previously been added to the histogram; it merely subtracts the
+   * requested value from the appropriate bucket's sum.
+   */
+  void removeValue(ValueType value) {
+    Bucket& bucket = buckets_.getByValue(value);
+    // TODO: It would be nice to handle overflow here.
+    bucket.sum -= value;
+    bucket.count -= 1;
+  }
+
+  /* Remove all data points from the histogram */
+  void clear() {
+    for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+      buckets_.getByIndex(i).clear();
+    }
+  }
+
+  /* Returns the bucket size of each bucket in the histogram. */
+  ValueType getBucketSize() const {
+    return buckets_.getBucketSize();
+  }
+  /* Returns the min value at which bucketing begins. */
+  ValueType getMin() const {
+    return buckets_.getMin();
+  }
+  /* Returns the max value at which bucketing ends. */
+  ValueType getMax() const {
+    return buckets_.getMax();
+  }
+  /* Returns the number of buckets */
+  unsigned int getNumBuckets() const {
+    return buckets_.getNumBuckets();
+  }
+
+  /* Returns the specified bucket (for reading only!) */
+  const Bucket& getBucketByIndex(int idx) const {
+    return buckets_.getByIndex(idx);
+  }
+
+  /*
+   * Returns the minimum threshold for the bucket at the given index.
+   *
+   * The bucket at the specified index will store values in the range
+   * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
+   * max is smaller than bucketMin + bucketSize.
+   */
+  ValueType getBucketMin(unsigned int idx) const {
+    return buckets_.getBucketMin(idx);
+  }
+
+  /*
+   * Returns the maximum threshold for the bucket at the given index.
+   *
+   * The bucket at the specified index will store values in the range
+   * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
+   * max is smaller than bucketMin + bucketSize.
+   */
+  ValueType getBucketMax(unsigned int idx) const {
+    return buckets_.getBucketMax(idx);
+  }
+
+  /*
+   * Get the bucket that the specified percentile falls into
+   *
+   * The lowest and highest percentile data points in returned bucket will be
+   * returned in the lowPct and highPct arguments, if they are non-NULL.
+   */
+  unsigned int getPercentileBucketIdx(double pct,
+                                      double* lowPct = NULL,
+                                      double* highPct = NULL) const {
+    // We unfortunately can't use lambdas here yet;
+    // Some users of this code are still built with gcc-4.4.
+    CountFromBucket countFn;
+    return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
+  }
+
+  /**
+   * Estimate the value at the specified percentile.
+   *
+   * @param pct     The desired percentile to find, as a value from 0.0 to 1.0.
+   *
+   * @return Returns an estimate for N, where N is the number where exactly pct
+   *         percentage of the data points in the histogram are less than N.
+   */
+  ValueType getPercentileEstimate(double pct) const {
+    CountFromBucket countFn;
+    AvgFromBucket avgFn;
+    return buckets_.getPercentileEstimate(pct, countFn, avgFn);
+  }
+
+  /*
+   * Get a human-readable string describing the histogram contents
+   */
+  std::string debugString() const;
+
+ private:
+  struct CountFromBucket {
+    uint64_t operator()(const Bucket& bucket) const {
+      return bucket.count;
+    }
+  };
+  struct AvgFromBucket {
+    ValueType operator()(const Bucket& bucket) const {
+      if (bucket.count == 0) {
+        return ValueType(0);
+      }
+      // Cast bucket.count to a signed integer type.  This ensures that we
+      // perform division properly here: If bucket.sum is a signed integer
+      // type but we divide by an unsigned number, unsigned division will be
+      // performed and bucket.sum will be converted to unsigned first.
+      // If bucket.sum is unsigned, the code will still do unsigned division
+      // correctly.
+      //
+      // The only downside is if bucket.count is large enough to be negative
+      // when treated as signed.  That should be extremely unlikely, though.
+      return bucket.sum / static_cast<int64_t>(bucket.count);
+    }
+  };
+
+  detail::HistogramBuckets<ValueType, Bucket> buckets_;
+};
+
+} // folly
+
+#include "folly/Histogram-inl.h"
+
+#endif // FOLLY_HISTOGRAM_H_
diff --git a/folly/IntrusiveList.h b/folly/IntrusiveList.h
new file mode 100644
index 00000000..d9d3c602
--- /dev/null
+++ b/folly/IntrusiveList.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_INTRUSIVELIST_H_
+#define FOLLY_INTRUSIVELIST_H_
+
+/*
+ * This file contains convenience typedefs that make boost::intrusive::list
+ * easier to use.
+ */
+
+#include <boost/intrusive/list.hpp>
+
+namespace folly {
+
+/**
+ * An auto-unlink intrusive list hook.
+ */
+typedef boost::intrusive::list_member_hook<
+      boost::intrusive::link_mode<boost::intrusive::auto_unlink> >
+        IntrusiveListHook;
+
+/**
+ * An intrusive list.
+ *
+ * An IntrusiveList always uses an auto-unlink hook.
+ * Beware that IntrusiveList::size() is an O(n) operation, since it has to walk
+ * the entire list.
+ *
+ * Example usage:
+ *
+ *   class Foo {
+ *     // Note that the listHook member variable needs to be visible
+ *     // to the code that defines the IntrusiveList instantiation.
+ *     // The list hook can be made public, or you can make the other class a
+ *     // friend.
+ *     IntrusiveListHook listHook;
+ *   };
+ *
+ *   typedef IntrusiveList<Foo, &Foo::listHook> FooList;
+ *
+ *   Foo *foo = new Foo();
+ *   FooList myList;
+ *   myList.push_back(*foo);
+ *
+ * Note that each IntrusiveListHook can only be part of a single list at any
+ * given time.  If you need the same object to be stored in two lists at once,
+ * you need to use two different IntrusiveListHook member variables.
+ *
+ * The elements stored in the list must contain an IntrusiveListHook member
+ * variable.
+ *
+ * TODO: This should really be a template alias.  However, gcc doesn't support
+ * template aliases yet.  A subclass is a reasonable workaround for now.  This
+ * subclass only supports the default constructor, but we could add other
+ * constructors if necessary.
+ */
+template<typename T, IntrusiveListHook T::* PtrToMember>
+class IntrusiveList : public boost::intrusive::list<
+    T,
+    boost::intrusive::member_hook<T, IntrusiveListHook, PtrToMember>,
+    boost::intrusive::constant_time_size<false> > {
+};
+
+/**
+ * A safe-link intrusive list hook.
+ */
+typedef boost::intrusive::list_member_hook<
+      boost::intrusive::link_mode<boost::intrusive::safe_link> >
+        SafeIntrusiveListHook;
+
+/**
+ * An intrusive list with const-time size() method.
+ *
+ * A CountedIntrusiveList always uses a safe-link hook.
+ * CountedIntrusiveList::size() is an O(1) operation. Users of this type
+ * of lists need to remove a member from a list by calling one of the
+ * methods on the list (e.g., erase(), pop_front(), etc.), rather than
+ * calling unlink on the member's list hook. Given references to a
+ * list and a member, a constant-time removal operation can be
+ * accomplished by list.erase(list.iterator_to(member)). Also, when a
+ * member is destroyed, it is NOT automatically removed from the list.
+ *
+ * Example usage:
+ *
+ *   class Foo {
+ *     // Note that the listHook member variable needs to be visible
+ *     // to the code that defines the CountedIntrusiveList instantiation.
+ *     // The list hook can be made public, or you can make the other class a
+ *     // friend.
+ *     SafeIntrusiveListHook listHook;
+ *   };
+ *
+ *   typedef CountedIntrusiveList<Foo, &Foo::listHook> FooList;
+ *
+ *   Foo *foo = new Foo();
+ *   FooList myList;
+ *   myList.push_back(*foo);
+ *   myList.pop_front();
+ *
+ * Note that each SafeIntrusiveListHook can only be part of a single list at any
+ * given time.  If you need the same object to be stored in two lists at once,
+ * you need to use two different SafeIntrusiveListHook member variables.
+ *
+ * The elements stored in the list must contain an SafeIntrusiveListHook member
+ * variable.
+ *
+ * TODO: This should really be a template alias.  However, gcc doesn't support
+ * template aliases yet.  A subclass is a reasonable workaround for now.  This
+ * subclass only supports the default constructor, but we could add other
+ * constructors if necessary.
+ */
+template<typename T, SafeIntrusiveListHook T::* PtrToMember>
+class CountedIntrusiveList : public boost::intrusive::list<
+    T,
+    boost::intrusive::member_hook<T, SafeIntrusiveListHook, PtrToMember>,
+    boost::intrusive::constant_time_size<true> > {
+};
+
+} // folly
+
+#endif // FOLLY_INTRUSIVELIST_H_
diff --git a/folly/Likely.h b/folly/Likely.h
new file mode 100644
index 00000000..c535e8ae
--- /dev/null
+++ b/folly/Likely.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Compiler hints to indicate the fast path of an "if" branch: whether
+ * the if condition is likely to be true or false.
+ *
+ * @author Tudor Bosman (tudorb@fb.com)
+ */
+
+#ifndef FOLLY_BASE_LIKELY_H_
+#define FOLLY_BASE_LIKELY_H_
+
+#undef LIKELY
+#undef UNLIKELY
+
+#if defined(__GNUC__) && __GNUC__ >= 4
+#define LIKELY(x)   (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define LIKELY(x)   (x)
+#define UNLIKELY(x) (x)
+#endif
+
+#endif /* FOLLY_BASE_LIKELY_H_ */
+
diff --git a/folly/Makefile.am b/folly/Makefile.am
new file mode 100644
index 00000000..a6a1d593
--- /dev/null
+++ b/folly/Makefile.am
@@ -0,0 +1,86 @@
+SUBDIRS = . test
+
+ACLOCAL_AMFLAGS = -I m4
+
+lib_LTLIBRARIES = \
+	libfolly.la \
+	libfollybenchmark.la \
+	libfollytimeout_queue.la
+
+follyincludedir = $(includedir)/folly
+
+nobase_follyinclude_HEADERS = \
+	FBVector.h \
+	detail/ThreadLocalDetail.h \
+	detail/DiscriminatedPtrDetail.h \
+	detail/AtomicHashUtils.h \
+	detail/BitIteratorDetail.h \
+	detail/GroupVarintDetail.h \
+	IntrusiveList.h \
+	TimeoutQueue.h \
+	String.h \
+	PackedSyncPtr.h \
+	Conv.h \
+	ThreadLocal.h \
+	ProducerConsumerQueue.h \
+	Histogram-inl.h \
+	ThreadCachedInt.h \
+	ConcurrentSkipList.h \
+	json.h \
+	folly-config.h \
+	FBString.h \
+	Unicode.h \
+	test/function_benchmark/test_functions.h \
+	test/function_benchmark/benchmark_impl.h \
+	test/FBStringTestBenchmarks.cpp.h \
+	test/SynchronizedTestLib.h \
+	test/FBVectorTestBenchmarks.cpp.h \
+	test/SynchronizedTestLib-inl.h \
+	Synchronized.h \
+	Malloc.h \
+	dynamic.h \
+	AtomicHashArray.h \
+	dynamic-inl.h \
+	Bits.h \
+	sorted_vector_types.h \
+	Hash.h \
+	DiscriminatedPtr.h \
+	ConcurrentSkipList-inl.h \
+	Random.h \
+	GroupVarint.h \
+	Range.h \
+	Benchmark.h \
+	Likely.h \
+	Histogram.h \
+	AtomicHashMap.h \
+	Portability.h \
+	AtomicHashArray-inl.h \
+	eventfd.h \
+	SmallLocks.h \
+	ScopeGuard.h \
+	Traits.h \
+	RWSpinLock.h \
+	small_vector.h \
+	Foreach.h \
+	AtomicHashMap-inl.h \
+	MapUtil.h
+
+FormatTables.cpp: build/generate_format_tables.py
+	build/generate_format_tables.py
+
+libfolly_la_SOURCES = \
+	Random.cpp \
+	Range.cpp \
+	Unicode.cpp \
+	Conv.cpp \
+	Format.cpp \
+	FormatTables.cpp \
+	String.cpp \
+	json.cpp \
+	dynamic.cpp \
+libfolly_la_LIBADD = $(BOOST_THREAD_LIBS) -lpthread
+
+libfollybenchmark_la_SOURCES = Benchmark.cpp
+libfollybenchmark_la_LIBADD = -lboost_regex -lpthread -lrt
+
+libfollytimeout_queue_la_SOURCES = TimeoutQueue.cpp
diff --git a/folly/Malloc.h b/folly/Malloc.h
new file mode 100644
index 00000000..52c4a30a
--- /dev/null
+++ b/folly/Malloc.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Functions to provide smarter use of jemalloc, if jemalloc is being used.
+// http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html
+
+#ifndef FOLLY_MALLOC_H_
+#define FOLLY_MALLOC_H_
+
+// If using fbstring from libstdc++, then just define stub code
+// here to typedef the fbstring type into the folly namespace.
+// This provides backwards compatibility for code that explicitly
+// includes and uses fbstring.
+#if defined(_GLIBCXX_USE_FB) && !defined(_LIBSTDCXX_FBSTRING)
+
+#include <string>
+namespace folly {
+  using std::goodMallocSize;
+  using std::jemallocMinInPlaceExpandable;
+  using std::usingJEMalloc;
+  using std::smartRealloc;
+}
+
+#else // !defined(_GLIBCXX_USE_FB) || defined(_LIBSTDCXX_FBSTRING)
+
+#ifdef _LIBSTDCXX_FBSTRING
+#pragma GCC system_header
+#define FOLLY_HAVE_MALLOC_H 1
+#else
+#include "folly-config.h"
+#endif
+
+// for malloc_usable_size
+// NOTE: FreeBSD 9 doesn't have malloc.h.  It's defitions
+// are found in stdlib.h.
+#ifdef FOLLY_HAVE_MALLOC_H
+#include <malloc.h>
+#else
+#include <stdlib.h>
+#endif
+
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+
+/**
+ * Declare rallocm() and malloc_usable_size() as weak symbols.  It
+ * will be provided by jemalloc if we are using jemalloc, or it will
+ * be NULL if we are using another malloc implementation.
+ */
+extern "C" int rallocm(void**, size_t*, size_t, size_t, int)
+__attribute__((weak));
+
+/**
+ * Define the ALLOCM_SUCCESS, ALLOCM_ZERO, and ALLOCM_NO_MOVE constants
+ * normally provided by jemalloc.  We define them so that we don't have to
+ * include jemalloc.h, in case the program is built without jemalloc support.
+ */
+#ifndef ALLOCM_SUCCESS
+#define ALLOCM_SUCCESS 0
+#define ALLOCM_ERR_OOM 1
+#define ALLOCM_ERR_NOT_MOVED 2
+
+#define ALLOCM_ZERO    64
+#define ALLOCM_NO_MOVE 128
+#endif
+
+#ifdef _LIBSTDCXX_FBSTRING
+namespace std _GLIBCXX_VISIBILITY(default) {
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+#else
+namespace folly {
+#endif
+
+
+/**
+ * Determine if we are using jemalloc or not.
+ */
+inline bool usingJEMalloc() {
+  return rallocm != NULL;
+}
+
+/**
+ * For jemalloc's size classes, see
+ * http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html
+ */
+inline size_t goodMallocSize(size_t minSize) {
+  if (!usingJEMalloc()) {
+    // Not using jemalloc - no smarts
+    return minSize;
+  }
+  if (minSize <= 64) {
+    // Choose smallest allocation to be 64 bytes - no tripping over
+    // cache line boundaries, and small string optimization takes care
+    // of short strings anyway.
+    return 64;
+  }
+  if (minSize <= 512) {
+    // Round up to the next multiple of 64; we don't want to trip over
+    // cache line boundaries.
+    return (minSize + 63) & ~size_t(63);
+  }
+  if (minSize <= 3840) {
+    // Round up to the next multiple of 256
+    return (minSize + 255) & ~size_t(255);
+  }
+  if (minSize <= 4072 * 1024) {
+    // Round up to the next multiple of 4KB
+    return (minSize + 4095) & ~size_t(4095);
+  }
+  // Holy Moly
+  // Round up to the next multiple of 4MB
+  return (minSize + 4194303) & ~size_t(4194303);
+}
+
+// We always request "good" sizes for allocation, so jemalloc can
+// never grow in place small blocks; they're already occupied to the
+// brim.  Blocks larger than or equal to 4096 bytes can in fact be
+// expanded in place, and this constant reflects that.
+static const size_t jemallocMinInPlaceExpandable = 4096;
+
+/**
+ * This function tries to reallocate a buffer of which only the first
+ * currentSize bytes are used. The problem with using realloc is that
+ * if currentSize is relatively small _and_ if realloc decides it
+ * needs to move the memory chunk to a new buffer, then realloc ends
+ * up copying data that is not used. It's impossible to hook into
+ * GNU's malloc to figure whether expansion will occur in-place or as
+ * a malloc-copy-free troika. (If an expand_in_place primitive would
+ * be available, smartRealloc would use it.) As things stand, this
+ * routine just tries to call realloc() (thus benefitting of potential
+ * copy-free coalescing) unless there's too much slack memory.
+ */
+inline void* smartRealloc(void* p,
+                          const size_t currentSize,
+                          const size_t currentCapacity,
+                          const size_t newCapacity) {
+  assert(p);
+  assert(currentSize <= currentCapacity &&
+         currentCapacity < newCapacity);
+
+  if (usingJEMalloc()) {
+    // using jemalloc's API. Don't forget that jemalloc can never grow
+    // in place blocks smaller than 4096 bytes.
+    if (currentCapacity >= jemallocMinInPlaceExpandable &&
+        rallocm(&p, NULL, newCapacity, 0, ALLOCM_NO_MOVE) == ALLOCM_SUCCESS) {
+      // Managed to expand in place
+      return p;
+    }
+    // Cannot expand; must move
+    auto const result = malloc(newCapacity);
+    std::memcpy(result, p, currentSize);
+    free(p);
+    return result;
+  }
+
+  // No jemalloc no honey
+  auto const slack = currentCapacity - currentSize;
+  if (slack * 2 > currentSize) {
+    // Too much slack, malloc-copy-free cycle:
+    auto const result = malloc(newCapacity);
+    std::memcpy(result, p, currentSize);
+    free(p);
+    return result;
+  }
+  // If there's not too much slack, we realloc in hope of coalescing
+  return realloc(p, newCapacity);
+}
+
+#ifdef _LIBSTDCXX_FBSTRING
+_GLIBCXX_END_NAMESPACE_VERSION
+#endif
+
+} // folly
+
+#endif // !defined(_GLIBCXX_USE_FB) || defined(_LIBSTDCXX_FBSTRING)
+
+#endif // FOLLY_MALLOC_H_
diff --git a/folly/MapUtil.h b/folly/MapUtil.h
new file mode 100644
index 00000000..b4e6dd05
--- /dev/null
+++ b/folly/MapUtil.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_MAPUTIL_H_
+#define FOLLY_MAPUTIL_H_
+
+namespace folly {
+
+/**
+ * Given a map and a key, return the value corresponding to the key in the map,
+ * or a given default value if the key doesn't exist in the map.
+ */
+template <class Map>
+typename Map::mapped_type get_default(
+    const Map& map, const typename Map::key_type& key,
+    const typename Map::mapped_type& dflt =
+    typename Map::mapped_type()) {
+  auto pos = map.find(key);
+  return (pos != map.end() ? pos->second : dflt);
+}
+
+/**
+ * Given a map and a key, return a reference to the value corresponding to the
+ * key in the map, or the given default reference if the key doesn't exist in
+ * the map.
+ */
+template <class Map>
+const typename Map::mapped_type& get_ref_default(
+    const Map& map, const typename Map::key_type& key,
+    const typename Map::mapped_type& dflt) {
+  auto pos = map.find(key);
+  return (pos != map.end() ? pos->second : dflt);
+}
+
+/**
+ * Given a map and a key, return a pointer to the value corresponding to the
+ * key in the map, or nullptr if the key doesn't exist in the map.
+ */
+template <class Map>
+const typename Map::mapped_type* get_ptr(
+    const Map& map, const typename Map::key_type& key) {
+  auto pos = map.find(key);
+  return (pos != map.end() ? &pos->second : nullptr);
+}
+
+/**
+ * Non-const overload of the above.
+ */
+template <class Map>
+typename Map::mapped_type* get_ptr(
+    Map& map, const typename Map::key_type& key) {
+  auto pos = map.find(key);
+  return (pos != map.end() ? &pos->second : nullptr);
+}
+
+}  // namespace folly
+
+#endif /* FOLLY_MAPUTIL_H_ */
+
diff --git a/folly/PackedSyncPtr.h b/folly/PackedSyncPtr.h
new file mode 100644
index 00000000..6e8cca53
--- /dev/null
+++ b/folly/PackedSyncPtr.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_PACKEDSYNCPTR_H_
+#define FOLLY_PACKEDSYNCPTR_H_
+
+#ifndef __x86_64__
+# error "PackedSyncPtr is x64-specific code."
+#endif
+
+/*
+ * An 8-byte pointer with an integrated spin lock and 15-bit integer
+ * (you can use this for a size of the allocation, if you want, or
+ * something else, or nothing).
+ *
+ * This is using an x64-specific detail about the effective virtual
+ * address space.  Long story short: the upper two bytes of all our
+ * pointers will be zero in reality---and if you have a couple billion
+ * such pointers in core, it makes pretty good sense to try to make
+ * use of that memory.  The exact details can be perused here:
+ *
+ *    http://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses
+ *
+ * This is not a "smart" pointer: nothing automagical is going on
+ * here.  Locking is up to the user.  Resource deallocation is up to
+ * the user.  Locks are never acquired or released outside explicit
+ * calls to lock() and unlock().
+ *
+ * Change the value of the raw pointer with set(), but you must hold
+ * the lock when calling this function if multiple threads could be
+ * using this class.
+ *
+ * TODO(jdelong): should we use the low order bit for the lock, so we
+ * get a whole 16-bits for our integer?  (There's also 2 more bits
+ * down there if the pointer comes from malloc.)
+ *
+ * @author Spencer Ahrens <sahrens@fb.com>
+ * @author Jordan DeLong <delong.j@fb.com>
+ */
+
+#include "folly/SmallLocks.h"
+#include <type_traits>
+#include <glog/logging.h>
+
+namespace folly {
+
+template<class T>
+class PackedSyncPtr {
+  // This just allows using this class even with T=void.  Attempting
+  // to use the operator* or operator[] on a PackedSyncPtr<void> will
+  // still properly result in a compile error.
+  typedef typename std::add_lvalue_reference<T>::type reference;
+
+public:
+  /*
+   * If you default construct one of these, you must call this init()
+   * function before using it.
+   *
+   * (We are avoiding a constructor to ensure gcc allows us to put
+   * this class in packed structures.)
+   */
+  void init(T* initialPtr = 0, uint16_t initialExtra = 0) {
+    auto intPtr = reinterpret_cast<uintptr_t>(initialPtr);
+    CHECK(!(intPtr >> 48));
+    data_.init(intPtr);
+    setExtra(initialExtra);
+  }
+
+  /*
+   * Sets a new pointer.  You must hold the lock when calling this
+   * function, or else be able to guarantee no other threads could be
+   * using this PackedSyncPtr<>.
+   */
+  void set(T* t) {
+    auto intPtr = reinterpret_cast<uintptr_t>(t);
+    auto shiftedExtra = uintptr_t(extra()) << 48;
+    CHECK(!(intPtr >> 48));
+    data_.setData(intPtr | shiftedExtra);
+  }
+
+  /*
+   * Get the pointer.
+   *
+   * You can call any of these without holding the lock, with the
+   * normal types of behavior you'll get on x64 from reading a pointer
+   * without locking.
+   */
+  T* get() const {
+    return reinterpret_cast<T*>(data_.getData() & (-1ull >> 16));
+  }
+  T* operator->() const { return get(); }
+  reference operator*() const { return *get(); }
+  reference operator[](std::ptrdiff_t i) const { return get()[i]; }
+
+  // Syncronization (logically const, even though this mutates our
+  // locked state: you can lock a const PackedSyncPtr<T> to read it).
+  void lock() const { data_.lock(); }
+  void unlock() const { data_.unlock(); }
+  bool try_lock() const { return data_.try_lock(); }
+
+  /*
+   * Access extra data stored in unused bytes of the pointer.
+   *
+   * It is ok to call this without holding the lock.
+   */
+  uint16_t extra() const {
+    return data_.getData() >> 48;
+  }
+
+  /*
+   * Don't try to put anything into this that has the high bit set:
+   * that's what we're using for the mutex.
+   *
+   * Don't call this without holding the lock.
+   */
+  void setExtra(uint16_t extra) {
+    CHECK(!(extra & 0x8000));
+    auto ptr = data_.getData() & (-1ull >> 16);
+    data_.setData((uintptr_t(extra) << 48) | ptr);
+  }
+
+  // Logically private, but we can't have private data members and
+  // still be considered a POD.  (In C++11 we are still a standard
+  // layout struct if this is private, but it doesn't matter, since
+  // gcc (4.6) won't let us use this with attribute packed still in
+  // that case.)
+  PicoSpinLock<uintptr_t> data_;
+};
+
+static_assert(sizeof(PackedSyncPtr<void>) == 8,
+              "PackedSyncPtr should be only 8 bytes---something is "
+              "messed up");
+
+}
+
+#endif
+
diff --git a/folly/Portability.h b/folly/Portability.h
new file mode 100644
index 00000000..44f352d6
--- /dev/null
+++ b/folly/Portability.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_PORTABILITY_H_
+#define FOLLY_PORTABILITY_H_
+
+#include "folly-config.h"
+
+#ifdef FOLLY_HAVE_SCHED_H
+ #include <sched.h>
+ #ifndef FOLLY_HAVE_PTHREAD_YIELD
+  #define pthread_yield sched_yield
+ #endif
+#endif
+
+
+#endif // FOLLY_PORTABILITY_H_
diff --git a/folly/Preprocessor.h b/folly/Preprocessor.h
new file mode 100644
index 00000000..d0ff3403
--- /dev/null
+++ b/folly/Preprocessor.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author: Andrei Alexandrescu
+
+#ifndef FOLLY_PREPROCESSOR_
+#define FOLLY_PREPROCESSOR_
+
+/**
+ * Necessarily evil preprocessor-related amenities.
+ */
+
+/**
+ * FB_ONE_OR_NONE(hello, world) expands to hello and
+ * FB_ONE_OR_NONE(hello) expands to nothing. This macro is used to
+ * insert or eliminate text based on the presence of another argument.
+ */
+#define FB_ONE_OR_NONE(a, ...) FB_THIRD(a, ## __VA_ARGS__, a)
+#define FB_THIRD(a, b, ...) __VA_ARGS__
+
+/**
+ * Helper macro that extracts the first argument out of a list of any
+ * number of arguments.
+ */
+#define FB_ARG_1(a, ...) a
+
+/**
+ * Helper macro that extracts the second argument out of a list of any
+ * number of arguments. If only one argument is given, it returns
+ * that.
+ */
+#define FB_ARG_2_OR_1(...) FB_ARG_2_OR_1_IMPL(__VA_ARGS__, __VA_ARGS__)
+// Support macro for the above
+#define FB_ARG_2_OR_1_IMPL(a, b, ...) b
+
+/**
+ * FB_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
+ * str and ending with a number that varies with the line.
+ */
+#ifndef FB_ANONYMOUS_VARIABLE
+#define FB_CONCATENATE_IMPL(s1, s2) s1##s2
+#define FB_CONCATENATE(s1, s2) FB_CONCATENATE_IMPL(s1, s2)
+#ifdef __COUNTER__
+#define FB_ANONYMOUS_VARIABLE(str) FB_CONCATENATE(str, __COUNTER__)
+#else
+#define FB_ANONYMOUS_VARIABLE(str) FB_CONCATENATE(str, __LINE__)
+#endif
+#endif
+
+/**
+ * Use FB_STRINGIZE(name) when you'd want to do what #name does inside
+ * another macro expansion.
+ */
+#define FB_STRINGIZE(name) #name
+
+
+#endif // FOLLY_PREPROCESSOR_
diff --git a/folly/ProducerConsumerQueue.h b/folly/ProducerConsumerQueue.h
new file mode 100644
index 00000000..6a545dea
--- /dev/null
+++ b/folly/ProducerConsumerQueue.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author Bo Hu (bhu@fb.com)
+// @author Jordan DeLong (delong.j@fb.com)
+
+#ifndef PRODUCER_CONSUMER_QUEUE_H_
+#define PRODUCER_CONSUMER_QUEUE_H_
+
+#include <atomic>
+#include <cassert>
+#include <cstdlib>
+#include <stdexcept>
+#include <type_traits>
+#include <utility>
+#include <boost/noncopyable.hpp>
+
+namespace folly {
+
+/*
+ * ProducerConsumerQueue is a one producer and one consumer queue
+ * without locks.
+ */
+template<class T>
+struct ProducerConsumerQueue : private boost::noncopyable {
+  typedef T value_type;
+
+  // size must be >= 2
+  explicit ProducerConsumerQueue(uint32_t size)
+    : size_(size)
+    , records_(static_cast<T*>(std::malloc(sizeof(T) * size)))
+    , readIndex_(0)
+    , writeIndex_(0)
+  {
+    assert(size >= 2);
+    if (!records_) {
+      throw std::bad_alloc();
+    }
+  }
+
+  ~ProducerConsumerQueue() {
+    // We need to destruct anything that may still exist in our queue.
+    // (No real synchronization needed at destructor time: only one
+    // thread can be doing this.)
+    if (!std::has_trivial_destructor<T>::value) {
+      int read = readIndex_;
+      int end = writeIndex_;
+      while (read != end) {
+        records_[read].~T();
+        if (++read == size_) {
+          read = 0;
+        }
+      }
+    }
+
+    std::free(records_);
+  }
+
+  template<class ...Args>
+  bool write(Args&&... recordArgs) {
+    auto const currentWrite = writeIndex_.load(std::memory_order_relaxed);
+    auto nextRecord = currentWrite + 1;
+    if (nextRecord == size_) {
+      nextRecord = 0;
+    }
+    if (nextRecord != readIndex_.load(std::memory_order_acquire)) {
+      new (&records_[currentWrite]) T(std::forward<Args>(recordArgs)...);
+      writeIndex_.store(nextRecord, std::memory_order_release);
+      return true;
+    }
+
+    // queue is full
+    return false;
+  }
+
+  bool read(T& record) {
+    auto const currentRead = readIndex_.load(std::memory_order_relaxed);
+    if (currentRead == writeIndex_.load(std::memory_order_acquire)) {
+      // queue is empty
+      return false;
+    }
+
+    auto nextRecord = currentRead + 1;
+    if (nextRecord == size_) {
+      nextRecord = 0;
+    }
+    record = std::move(records_[currentRead]);
+    records_[currentRead].~T();
+    readIndex_.store(nextRecord, std::memory_order_release);
+    return true;
+  }
+
+  bool isFull() const {
+    auto nextRecord = writeIndex_.load(std::memory_order_consume) + 1;
+    if (nextRecord == size_) {
+      nextRecord = 0;
+    }
+    if (nextRecord != readIndex_.load(std::memory_order_consume)) {
+      return false;
+    }
+    // queue is full
+    return true;
+  }
+
+private:
+  const uint32_t size_;
+  T* const records_;
+
+  std::atomic<int> readIndex_;
+  std::atomic<int> writeIndex_;
+};
+
+}
+
+#endif
diff --git a/folly/README b/folly/README
new file mode 100644
index 00000000..056d2d7a
--- /dev/null
+++ b/folly/README
@@ -0,0 +1,73 @@
+Folly: Facebook Open-source LibrarY
+-----------------------------------
+
+Folly is an open-source C++ library developed and used at Facebook.
+
+Note to Facebook contributors:
+* Code in folly may ONLY depend on other code from folly,
+  external libraries from /home/engshare/externals and from
+  fbcode/external (preferably only the latter, eventually all deps
+  will be moved to external)
+* Code in folly goes in namespace folly.
+* Tests should go in tests directory.
+* Please keep Makefile.am's up to date.  fbconfig should warn if
+  it thinks something is missing.
+* NO EXCEPTIONS. This will eventually be enforced.
+* config.h is a generated file.  It is checked in so fbmake can run
+  without needing to run autotools.  To regenerate it, run
+  autoreconf
+  ./configure
+  You will probably also have to export the correct paths for your
+  enviroment (such as CC or CXX_INCLUDES) since we aren't using
+  the standard linux paths.
+
+Dependencies
+------------
+
+- double-conversion (http://code.google.com/p/double-conversion/)
+
+    By default, the build tooling for double-conversion does not build
+    any libraries, which folly requires.  To build the necessary libraries
+    copy folly/SConstruct.double-conversion to your double-conversion
+    source directory before building:
+
+      [double-conversion/] scons -f SConstruct.double-conversion
+
+    Then set CPPFLAGS/LDFLAGS so that folly can find your double-conversion
+    build:
+
+      [folly/] LDFLAGS=-L<double-conversion>/ CPPFLAGS=-I<double-conversion>/src/
+        configure ...
+
+- googletest (Google C++ Testing Framework)
+
+  Grab gtest 1.6.0 from:
+  http://googletest.googlecode.com/files/gtest-1.6.0.zip
+
+  Unzip it inside of the test/ subdirectory.
+
+- additional platform specific dependencies:
+
+  Ubuntu 12.04 64-bit
+    - g++
+    - automake
+    - autoconf
+    - libtool
+    - libboost1.46-all-dev
+    - libgoogle-glog-dev
+        This package has been removed from 12.04 -- use the one from 11.10
+    - gflags (packages need to be downloaded from below)
+        http://gflags.googlecode.com/files/libgflags-dev_2.0-1_amd64.deb
+        http://gflags.googlecode.com/files/libgflags0_2.0-1_amd64.deb
+    - scons (for double-conversion)
+
+  Fedora 17 64-bit
+    - gcc
+    - gcc-c++
+    - autoconf
+    - automake
+    - boost-devel
+    - libtool
+    - glog-devel
+    - gflags-devel
+    - scons (for double-conversion)
diff --git a/folly/RWSpinLock.h b/folly/RWSpinLock.h
new file mode 100644
index 00000000..ec040756
--- /dev/null
+++ b/folly/RWSpinLock.h
@@ -0,0 +1,735 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Two Read-Write spin lock implementations.
+ *
+ *  Ref: http://locklessinc.com/articles/locks
+ *
+ *  Both locks here are faster than pthread_rwlock and have very low
+ *  overhead (usually 20-30ns).  They don't use any system mutexes and
+ *  are very compact (4/8 bytes), so are suitable for per-instance
+ *  based locking, particularly when contention is not expected.
+ *
+ *  In most cases, RWSpinLock is a reasonable choice.  It has minimal
+ *  overhead, and comparable contention performance when the number of
+ *  competing threads is less than or equal to the number of logical
+ *  CPUs.  Even as the number of threads gets larger, RWSpinLock can
+ *  still be very competitive in READ, although it is slower on WRITE,
+ *  and also inherently unfair to writers.
+ *
+ *  RWTicketSpinLock shows more balanced READ/WRITE performance.  If
+ *  your application really needs a lot more threads, and a
+ *  higher-priority writer, prefer one of the RWTicketSpinLock locks.
+ *
+ *  Caveats:
+ *
+ *    RWTicketSpinLock locks can only be used with GCC on x86/x86-64
+ *    based systems.
+ *
+ *    RWTicketSpinLock<32> only allows up to 2^8 - 1 concurrent
+ *    readers and writers.
+ *
+ *    RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
+ *    readers and writers.
+ *
+ *    RWSpinLock handles 2^30 - 1 concurrent readers.
+ *
+ * @author Xin Liu <xliux@fb.com>
+ */
+
+#ifndef FOLLY_RWSPINLOCK_H_
+#define FOLLY_RWSPINLOCK_H_
+
+/*
+========================================================================
+Benchmark on (Intel(R) Xeon(R) CPU  L5630  @ 2.13GHz)  8 cores(16 HTs)
+========================================================================
+
+------------------------------------------------------------------------------
+1. Single thread benchmark (read/write lock + unlock overhead)
+Benchmark                                    Iters   Total t    t/iter iter/sec
+-------------------------------------------------------------------------------
+*      BM_RWSpinLockRead                     100000  1.786 ms  17.86 ns   53.4M
++30.5% BM_RWSpinLockWrite                    100000  2.331 ms  23.31 ns  40.91M
++85.7% BM_RWTicketSpinLock32Read             100000  3.317 ms  33.17 ns  28.75M
++96.0% BM_RWTicketSpinLock32Write            100000    3.5 ms     35 ns  27.25M
++85.6% BM_RWTicketSpinLock64Read             100000  3.315 ms  33.15 ns  28.77M
++96.0% BM_RWTicketSpinLock64Write            100000    3.5 ms     35 ns  27.25M
++85.7% BM_RWTicketSpinLock32FavorWriterRead  100000  3.317 ms  33.17 ns  28.75M
++29.7% BM_RWTicketSpinLock32FavorWriterWrite 100000  2.316 ms  23.16 ns  41.18M
++85.3% BM_RWTicketSpinLock64FavorWriterRead  100000  3.309 ms  33.09 ns  28.82M
++30.2% BM_RWTicketSpinLock64FavorWriterWrite 100000  2.325 ms  23.25 ns  41.02M
++ 175% BM_PThreadRWMutexRead                 100000  4.917 ms  49.17 ns   19.4M
++ 166% BM_PThreadRWMutexWrite                100000  4.757 ms  47.57 ns  20.05M
+
+------------------------------------------------------------------------------
+2. Contention Benchmark      90% read  10% write
+Benchmark                    hits       average    min       max        sigma
+------------------------------------------------------------------------------
+---------- 8  threads ------------
+RWSpinLock       Write       142666     220ns      78ns      40.8us     269ns
+RWSpinLock       Read        1282297    222ns      80ns      37.7us     248ns
+RWTicketSpinLock Write       85692      209ns      71ns      17.9us     252ns
+RWTicketSpinLock Read        769571     215ns      78ns      33.4us     251ns
+pthread_rwlock_t Write       84248      2.48us     99ns      269us      8.19us
+pthread_rwlock_t Read        761646     933ns      101ns     374us      3.25us
+
+---------- 16 threads ------------
+RWSpinLock       Write       124236     237ns      78ns      261us      801ns
+RWSpinLock       Read        1115807    236ns      78ns      2.27ms     2.17us
+RWTicketSpinLock Write       81781      231ns      71ns      31.4us     351ns
+RWTicketSpinLock Read        734518     238ns      78ns      73.6us     379ns
+pthread_rwlock_t Write       83363      7.12us     99ns      785us      28.1us
+pthread_rwlock_t Read        754978     2.18us     101ns     1.02ms     14.3us
+
+---------- 50 threads ------------
+RWSpinLock       Write       131142     1.37us     82ns      7.53ms     68.2us
+RWSpinLock       Read        1181240    262ns      78ns      6.62ms     12.7us
+RWTicketSpinLock Write       83045      397ns      73ns      7.01ms     31.5us
+RWTicketSpinLock Read        744133     386ns      78ns        11ms     31.4us
+pthread_rwlock_t Write       80849      112us      103ns     4.52ms     263us
+pthread_rwlock_t Read        728698     24us       101ns     7.28ms     194us
+
+*/
+
+#if defined(__GNUC__) && (defined(__i386) || defined(__x86_64__) || \
+    defined(ARCH_K8))
+#define RW_SPINLOCK_USE_X86_INTRINSIC_
+#include <x86intrin.h>
+#else
+#undef RW_SPINLOCK_USE_X86_INTRINSIC_
+#endif
+
+#include <atomic>
+#include <string>
+#include <algorithm>
+#include <boost/noncopyable.hpp>
+
+#include <sched.h>
+#include <glog/logging.h>
+
+#include "folly/Likely.h"
+
+namespace folly {
+
+/*
+ * A simple, small (4-bytes), but unfair rwlock.  Use it when you want
+ * a nice writer and don't expect a lot of write/read contention, or
+ * when you need small rwlocks since you are creating a large number
+ * of them.
+ *
+ * Note that the unfairness here is extreme: if the lock is
+ * continually accessed for read, writers will never get a chance.  If
+ * the lock can be that highly contended this class is probably not an
+ * ideal choice anyway.
+ *
+ * It currently implements most of the Lockable, SharedLockable and
+ * UpgradeLockable concepts except the TimedLockable related locking/unlocking
+ * interfaces.
+ */
+class RWSpinLock : boost::noncopyable {
+  enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 };
+ public:
+  RWSpinLock() : bits_(0) {}
+
+  // Lockable Concept
+  void lock() {
+    int count = 0;
+    while (!LIKELY(try_lock())) {
+      if (++count > 1000) sched_yield();
+    }
+  }
+
+  // Writer is responsible for clearing up both the UPGRADED and WRITER bits.
+  void unlock() {
+    static_assert(READER > WRITER + UPGRADED, "wrong bits!");
+    bits_.fetch_and(~(WRITER | UPGRADED), std::memory_order_release);
+  }
+
+  // SharedLockable Concept
+  void lock_shared() {
+    int count = 0;
+    while (!LIKELY(try_lock_shared())) {
+      if (++count > 1000) sched_yield();
+    }
+  }
+
+  void unlock_shared() {
+    bits_.fetch_add(-READER, std::memory_order_release);
+  }
+
+  // Downgrade the lock from writer status to reader status.
+  void unlock_and_lock_shared() {
+    bits_.fetch_add(READER, std::memory_order_acquire);
+    unlock();
+  }
+
+  // UpgradeLockable Concept
+  void lock_upgrade() {
+    int count = 0;
+    while (!try_lock_upgrade()) {
+      if (++count > 1000) sched_yield();
+    }
+  }
+
+  void unlock_upgrade() {
+    bits_.fetch_add(-UPGRADED, std::memory_order_acq_rel);
+  }
+
+  // unlock upgrade and try to acquire write lock
+  void unlock_upgrade_and_lock() {
+    int64_t count = 0;
+    while (!try_unlock_upgrade_and_lock()) {
+      if (++count > 1000) sched_yield();
+    }
+  }
+
+  // unlock upgrade and read lock atomically
+  void unlock_upgrade_and_lock_shared() {
+    bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel);
+  }
+
+  void unlock_shared_and_lock_upgrade() {
+    lock_upgrade();
+    unlock_shared();
+  }
+
+  // write unlock and upgrade lock atomically
+  void unlock_and_lock_upgrade() {
+    // need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
+    // the same time when other threads are trying do try_lock_upgrade().
+    bits_.fetch_or(UPGRADED, std::memory_order_acquire);
+    bits_.fetch_add(-WRITER, std::memory_order_release);
+  }
+
+
+  // Attempt to acquire writer permission. Return false if we didn't get it.
+  bool try_lock() {
+    int32_t expect = 0;
+    return bits_.compare_exchange_strong(expect, WRITER,
+      std::memory_order_acq_rel);
+  }
+
+  // Try to get reader permission on the lock. This can fail if we
+  // find out someone is a writer.
+  bool try_lock_shared() {
+    // fetch_add is considerably (100%) faster than compare_exchange,
+    // so here we are optimizing for the common (lock success) case.
+    int32_t value = bits_.fetch_add(READER, std::memory_order_acquire);
+    if (UNLIKELY(value & WRITER)) {
+      bits_.fetch_add(-READER, std::memory_order_release);
+      return false;
+    }
+    return true;
+  }
+
+  // try to unlock upgrade and write lock atomically
+  bool try_unlock_upgrade_and_lock() {
+    int32_t expect = UPGRADED;
+    return bits_.compare_exchange_strong(expect, WRITER,
+        std::memory_order_acq_rel);
+  }
+
+  // try to acquire an upgradable lock.
+  bool try_lock_upgrade() {
+    int32_t value = bits_.fetch_or(UPGRADED, std::memory_order_acquire);
+
+    // Note: when failed, we cannot flip the UPGRADED bit back,
+    // as in this case there is either another upgrade lock or a write lock.
+    // If it's a write lock, the bit will get cleared up when that lock's done
+    // with unlock().
+    return ((value & (UPGRADED | WRITER)) == 0);
+  }
+
+  // mainly for debugging purposes.
+  int32_t bits() const { return bits_.load(std::memory_order_acquire); }
+
+  class ReadHolder;
+  class UpgradedHolder;
+  class WriteHolder;
+
+  class ReadHolder {
+   public:
+    explicit ReadHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
+      if (lock_) lock_->lock_shared();
+    }
+
+    explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
+      lock_->lock_shared();
+    }
+
+    ReadHolder(ReadHolder&& other) : lock_(other.lock_) {
+      other.lock_ = nullptr;
+    }
+
+    // down-grade
+    explicit ReadHolder(UpgradedHolder&& upgraded) : lock_(upgraded.lock_) {
+      upgraded.lock_ = nullptr;
+      if (lock_) lock_->unlock_upgrade_and_lock_shared();
+    }
+
+    explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) {
+      writer.lock_ = nullptr;
+      if (lock_) lock_->unlock_and_lock_shared();
+    }
+
+    ReadHolder& operator=(ReadHolder&& other) {
+      using std::swap;
+      swap(lock_, other.lock_);
+      return *this;
+    }
+
+    ReadHolder(const ReadHolder& other) = delete;
+    ReadHolder& operator=(const ReadHolder& other) = delete;
+
+    ~ReadHolder() { if (lock_) lock_->unlock_shared(); }
+
+    void reset(RWSpinLock* lock = nullptr) {
+      if (lock == lock_) return;
+      if (lock_) lock_->unlock_shared();
+      lock_ = lock;
+      if (lock_) lock_->lock_shared();
+    }
+
+    void swap(ReadHolder* other) {
+      std::swap(lock_, other->lock_);
+    }
+
+   private:
+    friend class UpgradedHolder;
+    friend class WriteHolder;
+    RWSpinLock* lock_;
+  };
+
+  class UpgradedHolder {
+   public:
+    explicit UpgradedHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
+      if (lock_) lock_->lock_upgrade();
+    }
+
+    explicit UpgradedHolder(RWSpinLock& lock) : lock_(&lock) {
+      lock_->lock_upgrade();
+    }
+
+    explicit UpgradedHolder(ReadHolder&& reader) {
+      lock_ = reader.lock_;
+      reader.lock_ = nullptr;
+      if (lock_) lock_->unlock_shared_and_lock_upgrade();
+    }
+
+    explicit UpgradedHolder(WriteHolder&& writer) {
+      lock_ = writer.lock_;
+      writer.lock_ = nullptr;
+      if (lock_) lock_->unlock_and_lock_upgrade();
+    }
+
+    UpgradedHolder(UpgradedHolder&& other) : lock_(other.lock_) {
+      other.lock_ = nullptr;
+    }
+
+    UpgradedHolder& operator =(UpgradedHolder&& other) {
+      using std::swap;
+      swap(lock_, other.lock_);
+      return *this;
+    }
+
+    UpgradedHolder(const UpgradedHolder& other) = delete;
+    UpgradedHolder& operator =(const UpgradedHolder& other) = delete;
+
+    ~UpgradedHolder() { if (lock_) lock_->unlock_upgrade(); }
+
+    void reset(RWSpinLock* lock = nullptr) {
+      if (lock == lock_) return;
+      if (lock_) lock_->unlock_upgrade();
+      lock_ = lock;
+      if (lock_) lock_->lock_upgrade();
+    }
+
+    void swap(UpgradedHolder* other) {
+      using std::swap;
+      swap(lock_, other->lock_);
+    }
+
+   private:
+    friend class WriteHolder;
+    friend class ReadHolder;
+    RWSpinLock* lock_;
+  };
+
+  class WriteHolder {
+   public:
+    explicit WriteHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
+      if (lock_) lock_->lock();
+    }
+
+    explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
+      lock_->lock();
+    }
+
+    // promoted from an upgrade lock holder
+    explicit WriteHolder(UpgradedHolder&& upgraded) {
+      lock_ = upgraded.lock_;
+      upgraded.lock_ = nullptr;
+      if (lock_) lock_->unlock_upgrade_and_lock();
+    }
+
+    WriteHolder(WriteHolder&& other) : lock_(other.lock_) {
+      other.lock_ = nullptr;
+    }
+
+    WriteHolder& operator =(WriteHolder&& other) {
+      using std::swap;
+      swap(lock_, other.lock_);
+      return *this;
+    }
+
+    WriteHolder(const WriteHolder& other) = delete;
+    WriteHolder& operator =(const WriteHolder& other) = delete;
+
+    ~WriteHolder () { if (lock_) lock_->unlock(); }
+
+    void reset(RWSpinLock* lock = nullptr) {
+      if (lock == lock_) return;
+      if (lock_) lock_->unlock();
+      lock_ = lock;
+      if (lock_) lock_->lock();
+    }
+
+    void swap(WriteHolder* other) {
+      using std::swap;
+      swap(lock_, other->lock_);
+    }
+
+   private:
+    friend class ReadHolder;
+    friend class UpgradedHolder;
+    RWSpinLock* lock_;
+  };
+
+  // Synchronized<> adaptors
+  friend void acquireRead(RWSpinLock& l) { return l.lock_shared(); }
+  friend void acquireReadWrite(RWSpinLock& l) { return l.lock(); }
+  friend void releaseRead(RWSpinLock& l) { return l.unlock_shared(); }
+  friend void releaseReadWrite(RWSpinLock& l) { return l.unlock(); }
+
+ private:
+  std::atomic<int32_t> bits_;
+};
+
+
+#ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
+// A more balanced Read-Write spin lock implemented based on GCC intrinsics.
+
+namespace detail {
+template <size_t kBitWidth> struct RWTicketIntTrait {
+  static_assert(kBitWidth == 32 || kBitWidth == 64,
+      "bit width has to be either 32 or 64 ");
+};
+
+template <>
+struct RWTicketIntTrait<64> {
+  typedef uint64_t FullInt;
+  typedef uint32_t HalfInt;
+  typedef uint16_t QuarterInt;
+
+#ifdef __SSE2__
+  static __m128i make128(const uint16_t v[4]) {
+    return _mm_set_epi16(0, 0, 0, 0, v[3], v[2], v[1], v[0]);
+  }
+  static inline __m128i fromInteger(uint64_t from) {
+    return _mm_cvtsi64_si128(from);
+  }
+  static inline uint64_t toInteger(__m128i in) {
+    return _mm_cvtsi128_si64(in);
+  }
+  static inline uint64_t addParallel(__m128i in, __m128i kDelta) {
+    return toInteger(_mm_add_epi16(in, kDelta));
+  }
+#endif
+};
+
+template <>
+struct RWTicketIntTrait<32> {
+  typedef uint32_t FullInt;
+  typedef uint16_t HalfInt;
+  typedef uint8_t QuarterInt;
+
+#ifdef __SSE2__
+  static __m128i make128(const uint8_t v[4]) {
+    return _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, v[3], v[2], v[1], v[0]);
+  }
+  static inline __m128i fromInteger(uint32_t from) {
+    return _mm_cvtsi32_si128(from);
+  }
+  static inline uint32_t toInteger(__m128i in) {
+    return _mm_cvtsi128_si32(in);
+  }
+  static inline uint32_t addParallel(__m128i in, __m128i kDelta) {
+    return toInteger(_mm_add_epi8(in, kDelta));
+  }
+#endif
+};
+}  // detail
+
+
+template<size_t kBitWidth, bool kFavorWriter=false>
+class RWTicketSpinLockT : boost::noncopyable {
+  typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType;
+  typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt;
+  typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt;
+  typedef typename detail::RWTicketIntTrait<kBitWidth>::QuarterInt
+    QuarterInt;
+
+  union RWTicket {
+    FullInt whole;
+    HalfInt readWrite;
+    __extension__ struct {
+      QuarterInt write;
+      QuarterInt read;
+      QuarterInt users;
+    };
+  } ticket;
+
+ private: // Some x64-specific utilities for atomic access to ticket.
+  template<class T> static T load_acquire(T* addr) {
+    T t = *addr; // acquire barrier
+    asm volatile("" : : : "memory");
+    return t;
+  }
+
+  template<class T>
+  static void store_release(T* addr, T v) {
+    asm volatile("" : : : "memory");
+    *addr = v; // release barrier
+  }
+
+ public:
+
+  RWTicketSpinLockT() {
+    store_release(&ticket.whole, FullInt(0));
+  }
+
+  void lock() {
+    if (kFavorWriter) {
+      writeLockAggressive();
+    } else {
+      writeLockNice();
+    }
+  }
+
+  /*
+   * Both try_lock and try_lock_shared diverge in our implementation from the
+   * lock algorithm described in the link above.
+   *
+   * In the read case, it is undesirable that the readers could wait
+   * for another reader (before increasing ticket.read in the other
+   * implementation).  Our approach gives up on
+   * first-come-first-serve, but our benchmarks showed improve
+   * performance for both readers and writers under heavily contended
+   * cases, particularly when the number of threads exceeds the number
+   * of logical CPUs.
+   *
+   * We have writeLockAggressive() using the original implementation
+   * for a writer, which gives some advantage to the writer over the
+   * readers---for that path it is guaranteed that the writer will
+   * acquire the lock after all the existing readers exit.
+   */
+  bool try_lock() {
+    RWTicket t;
+    FullInt old = t.whole = load_acquire(&ticket.whole);
+    if (t.users != t.write) return false;
+    ++t.users;
+    return __sync_bool_compare_and_swap(&ticket.whole, old, t.whole);
+  }
+
+  /*
+   * Call this if you want to prioritize writer to avoid starvation.
+   * Unlike writeLockNice, immediately acquires the write lock when
+   * the existing readers (arriving before the writer) finish their
+   * turns.
+   */
+  void writeLockAggressive() {
+    QuarterInt val = __sync_fetch_and_add(&ticket.users, 1);
+    while (val != load_acquire(&ticket.write)) {
+      asm volatile("pause");
+    }
+  }
+
+  // Call this when the writer should be nicer to the readers.
+  void writeLockNice() {
+    // Here it doesn't cpu-relax the writer.
+    //
+    // This is because usually we have many more readers than the
+    // writers, so the writer has less chance to get the lock when
+    // there are a lot of competing readers.  The aggressive spinning
+    // can help to avoid starving writers.
+    while (!try_lock()) {}
+  }
+
+  // Atomically unlock the write-lock from writer and acquire the read-lock.
+  void unlock_and_lock_shared() {
+    QuarterInt val = __sync_fetch_and_add(&ticket.read, 1);
+  }
+
+  // Release writer permission on the lock.
+  void unlock() {
+    RWTicket t;
+    t.whole = load_acquire(&ticket.whole);
+    FullInt old = t.whole;
+
+#ifdef __SSE2__
+    // SSE2 can reduce the lock and unlock overhead by 10%
+    static const QuarterInt kDeltaBuf[4] = { 1, 1, 0, 0 };   // write/read/user
+    static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
+    __m128i m = IntTraitType::fromInteger(old);
+    t.whole = IntTraitType::addParallel(m, kDelta);
+#else
+    ++t.read;
+    ++t.write;
+#endif
+    store_release(&ticket.readWrite, t.readWrite);
+  }
+
+  void lock_shared() {
+    while (!LIKELY(try_lock_shared())) {
+      asm volatile("pause");
+    }
+  }
+
+  bool try_lock_shared() {
+    RWTicket t, old;
+    old.whole = t.whole = load_acquire(&ticket.whole);
+    old.users = old.read;
+#ifdef  __SSE2__
+    // SSE2 may reduce the total lock and unlock overhead by 10%
+    static const QuarterInt kDeltaBuf[4] = { 0, 1, 1, 0 };   // write/read/user
+    static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
+    __m128i m = IntTraitType::fromInteger(old.whole);
+    t.whole = IntTraitType::addParallel(m, kDelta);
+#else
+    ++t.read;
+    ++t.users;
+#endif
+    return __sync_bool_compare_and_swap(&ticket.whole, old.whole, t.whole);
+  }
+
+  void unlock_shared() {
+    QuarterInt val = __sync_fetch_and_add(&ticket.write, 1);
+  }
+
+  class WriteHolder;
+
+  typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock;
+  class ReadHolder : boost::noncopyable {
+   public:
+    explicit ReadHolder(RWSpinLock *lock = nullptr) :
+      lock_(lock) {
+      if (lock_) lock_->lock_shared();
+    }
+
+    explicit ReadHolder(RWSpinLock &lock) : lock_ (&lock) {
+      if (lock_) lock_->lock_shared();
+    }
+
+    // atomically unlock the write-lock from writer and acquire the read-lock
+    explicit ReadHolder(WriteHolder *writer) : lock_(nullptr) {
+      std::swap(this->lock_, writer->lock_);
+      if (lock_) {
+        lock_->unlock_and_lock_shared();
+      }
+    }
+
+    ~ReadHolder() {
+      if (lock_) lock_->unlock_shared();
+    }
+
+    void reset(RWSpinLock *lock = nullptr) {
+      if (lock_) lock_->unlock_shared();
+      lock_ = lock;
+      if (lock_) lock_->lock_shared();
+    }
+
+    void swap(ReadHolder *other) {
+      std::swap(this->lock_, other->lock_);
+    }
+
+   private:
+    RWSpinLock *lock_;
+  };
+
+  class WriteHolder : boost::noncopyable {
+   public:
+    explicit WriteHolder(RWSpinLock *lock = nullptr) : lock_(lock) {
+      if (lock_) lock_->lock();
+    }
+    explicit WriteHolder(RWSpinLock &lock) : lock_ (&lock) {
+      if (lock_) lock_->lock();
+    }
+
+    ~WriteHolder() {
+      if (lock_) lock_->unlock();
+    }
+
+    void reset(RWSpinLock *lock = nullptr) {
+      if (lock == lock_) return;
+      if (lock_) lock_->unlock();
+      lock_ = lock;
+      if (lock_) lock_->lock();
+    }
+
+    void swap(WriteHolder *other) {
+      std::swap(this->lock_, other->lock_);
+    }
+
+   private:
+    friend class ReadHolder;
+    RWSpinLock *lock_;
+  };
+
+  // Synchronized<> adaptors.
+  friend void acquireRead(RWTicketSpinLockT& mutex) {
+    mutex.lock_shared();
+  }
+  friend void acquireReadWrite(RWTicketSpinLockT& mutex) {
+    mutex.lock();
+  }
+  friend bool acquireReadWrite(RWTicketSpinLockT& mutex,
+                               unsigned int milliseconds) {
+    mutex.lock();
+    return true;
+  }
+  friend void releaseRead(RWTicketSpinLockT& mutex) {
+    mutex.unlock_shared();
+  }
+  friend void releaseReadWrite(RWTicketSpinLockT& mutex) {
+    mutex.unlock();
+  }
+};
+
+typedef RWTicketSpinLockT<32> RWTicketSpinLock32;
+typedef RWTicketSpinLockT<64> RWTicketSpinLock64;
+
+#endif  // RW_SPINLOCK_USE_X86_INTRINSIC_
+
+}  // namespace folly
+
+#ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
+#undef RW_SPINLOCK_USE_X86_INTRINSIC_
+#endif
+
+#endif  // FOLLY_RWSPINLOCK_H_
diff --git a/folly/Random.cpp b/folly/Random.cpp
new file mode 100644
index 00000000..13664ce8
--- /dev/null
+++ b/folly/Random.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Random.h"
+
+#include <unistd.h>
+#include <sys/time.h>
+
+namespace folly {
+
+uint32_t randomNumberSeed() {
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  const uint32_t kPrime1 = 61631;
+  const uint32_t kPrime2 = 64997;
+  const uint32_t kPrime3 = 111857;
+  return kPrime1 * static_cast<uint32_t>(getpid())
+       + kPrime2 * static_cast<uint32_t>(tv.tv_sec)
+       + kPrime3 * static_cast<uint32_t>(tv.tv_usec);
+}
+
+}
diff --git a/folly/Random.h b/folly/Random.h
new file mode 100644
index 00000000..6532855b
--- /dev/null
+++ b/folly/Random.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_BASE_RANDOM_H_
+#define FOLLY_BASE_RANDOM_H_
+
+#include <stdint.h>
+
+namespace folly {
+
+/*
+ * Return a good seed for a random number generator.
+ */
+uint32_t randomNumberSeed();
+
+}
+
+#endif
diff --git a/folly/Range.cpp b/folly/Range.cpp
new file mode 100644
index 00000000..45ffd39d
--- /dev/null
+++ b/folly/Range.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// @author Mark Rabkin (mrabkin@fb.com)
+// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
+//
+
+#include "folly/Range.h"
+
+namespace folly {
+
+/**
+Predicates that can be used with qfind and startsWith
+ */
+const AsciiCaseSensitive asciiCaseSensitive = AsciiCaseSensitive();
+const AsciiCaseInsensitive asciiCaseInsensitive = AsciiCaseInsensitive();
+
+std::ostream& operator<<(std::ostream& os, const StringPiece& piece) {
+  os.write(piece.start(), piece.size());
+  return os;
+}
+
+} // namespace folly
diff --git a/folly/Range.h b/folly/Range.h
new file mode 100644
index 00000000..e628cd00
--- /dev/null
+++ b/folly/Range.h
@@ -0,0 +1,552 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author Mark Rabkin (mrabkin@fb.com)
+// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
+
+#ifndef FOLLY_RANGE_H_
+#define FOLLY_RANGE_H_
+
+#include "folly/FBString.h"
+#include <glog/logging.h>
+#include <iostream>
+#include <string>
+#include <stdexcept>
+#include <boost/operators.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/type_traits.hpp>
+#include <bits/c++config.h>
+#include "folly/Traits.h"
+
+namespace folly {
+
+template <class T> class Range;
+
+/**
+Finds the first occurrence of needle in haystack. The algorithm is on
+average faster than O(haystack.size() * needle.size()) but not as fast
+as Boyer-Moore. On the upside, it does not do any upfront
+preprocessing and does not allocate memory.
+ */
+template <class T>
+inline size_t qfind(const Range<T> & haystack,
+                    const Range<T> & needle);
+
+/**
+Finds the first occurrence of needle in haystack. The result is the
+offset reported to the beginning of haystack, or string::npos if
+needle wasn't found.
+ */
+template <class T>
+size_t qfind(const Range<T> & haystack,
+             const typename Range<T>::value_type& needle);
+
+/**
+ * Small internal helper - returns the value just before an iterator.
+ */
+namespace detail {
+
+/**
+ * For random-access iterators, the value before is simply i[-1].
+ */
+template <class Iter>
+typename boost::enable_if_c<
+  boost::is_same<typename std::iterator_traits<Iter>::iterator_category,
+                 std::random_access_iterator_tag>::value,
+  typename std::iterator_traits<Iter>::reference>::type
+value_before(Iter i) {
+  return i[-1];
+}
+
+/**
+ * For all other iterators, we need to use the decrement operator.
+ */
+template <class Iter>
+typename boost::enable_if_c<
+  !boost::is_same<typename std::iterator_traits<Iter>::iterator_category,
+                  std::random_access_iterator_tag>::value,
+  typename std::iterator_traits<Iter>::reference>::type
+value_before(Iter i) {
+  return *--i;
+}
+
+} // namespace detail
+
+/**
+ * Range abstraction keeping a pair of iterators. We couldn't use
+ * boost's similar range abstraction because we need an API identical
+ * with the former StringPiece class, which is used by a lot of other
+ * code. This abstraction does fulfill the needs of boost's
+ * range-oriented algorithms though.
+ *
+ * (Keep memory lifetime in mind when using this class, since it
+ * doesn't manage the data it refers to - just like an iterator
+ * wouldn't.)
+ */
+template <class Iter>
+class Range : private boost::totally_ordered<Range<Iter> > {
+public:
+  typedef std::size_t size_type;
+  typedef Iter iterator;
+  typedef Iter const_iterator;
+  typedef typename boost::remove_reference<
+    typename std::iterator_traits<Iter>::reference>::type
+  value_type;
+  typedef typename std::iterator_traits<Iter>::reference reference;
+  typedef std::char_traits<value_type> traits_type;
+
+  static const size_type npos = -1;
+
+  // Works for all iterators
+  Range() : b_(), e_() {
+  }
+
+private:
+  static bool reachable(Iter b, Iter e, std::forward_iterator_tag) {
+    for (; b != e; ++b) {
+      LOG_EVERY_N(INFO, 100000) << __FILE__ ":" << __LINE__
+                                << " running reachability test ("
+                                << google::COUNTER << " iterations)...";
+    }
+    return true;
+  }
+
+  static bool reachable(Iter b, Iter e, std::random_access_iterator_tag) {
+    return b <= e;
+  }
+
+public:
+  // Works for all iterators
+  Range(Iter start, Iter end)
+      : b_(start), e_(end) {
+    assert(reachable(b_, e_,
+                     typename std::iterator_traits<Iter>::iterator_category()));
+  }
+
+  // Works only for random-access iterators
+  Range(Iter start, size_t size)
+      : b_(start), e_(start + size) { }
+
+  // Works only for Range<const char*>
+  /* implicit */ Range(Iter str)
+      : b_(str), e_(b_ + strlen(str)) {}
+  // Works only for Range<const char*>
+  /* implicit */ Range(const std::string& str)
+      : b_(str.data()), e_(b_ + str.size()) {}
+  // Works only for Range<const char*>
+  Range(const std::string& str, std::string::size_type startFrom) {
+    CHECK_LE(startFrom, str.size());
+    b_ = str.data() + startFrom;
+    e_ = str.data() + str.size();
+  }
+  // Works only for Range<const char*>
+  Range(const std::string& str,
+        std::string::size_type startFrom,
+        std::string::size_type size) {
+    CHECK_LE(startFrom + size, str.size());
+    b_ = str.data() + startFrom;
+    e_ = b_ + size;
+  }
+  // Works only for Range<const char*>
+  /* implicit */ Range(const fbstring& str)
+    : b_(str.data()), e_(b_ + str.size()) { }
+  // Works only for Range<const char*>
+  Range(const fbstring& str, fbstring::size_type startFrom) {
+    CHECK_LE(startFrom, str.size());
+    b_ = str.data() + startFrom;
+    e_ = str.data() + str.size();
+  }
+  // Works only for Range<const char*>
+  Range(const fbstring& str, fbstring::size_type startFrom,
+        fbstring::size_type size) {
+    CHECK_LE(startFrom + size, str.size());
+    b_ = str.data() + startFrom;
+    e_ = b_ + size;
+  }
+
+  void clear() {
+    b_ = Iter();
+    e_ = Iter();
+  }
+
+  void assign(Iter start, Iter end) {
+    b_ = start;
+    e_ = end;
+  }
+
+  void reset(Iter start, size_type size) {
+    b_ = start;
+    e_ = start + size;
+  }
+
+  // Works only for Range<const char*>
+  void reset(const std::string& str) {
+    reset(str.data(), str.size());
+  }
+
+  size_type size() const {
+    assert(b_ <= e_);
+    return e_ - b_;
+  }
+  size_type walk_size() const {
+    assert(b_ <= e_);
+    return std::distance(b_, e_);
+  }
+  bool empty() const { return b_ == e_; }
+  Iter data() const { return b_; }
+  Iter start() const { return b_; }
+  Iter begin() const { return b_; }
+  Iter end() const { return e_; }
+  Iter cbegin() const { return b_; }
+  Iter cend() const { return e_; }
+  value_type& front() {
+    assert(b_ < e_);
+    return *b_;
+  }
+  value_type& back() {
+    assert(b_ < e_);
+    return detail::value_before(e_);
+  }
+  const value_type& front() const {
+    assert(b_ < e_);
+    return *b_;
+  }
+  const value_type& back() const {
+    assert(b_ < e_);
+    return detail::value_before(e_);
+  }
+  // Works only for Range<const char*>
+  std::string str() const { return std::string(b_, size()); }
+  std::string toString() const { return str(); }
+  // Works only for Range<const char*>
+  fbstring fbstr() const { return fbstring(b_, size()); }
+  fbstring toFbstring() const { return fbstr(); }
+
+  // Works only for Range<const char*>
+  int compare(const Range& o) const {
+    const size_type tsize = this->size();
+    const size_type osize = o.size();
+    const size_type msize = std::min(tsize, osize);
+    int r = traits_type::compare(data(), o.data(), msize);
+    if (r == 0) r = tsize - osize;
+    return r;
+  }
+
+  value_type& operator[](size_t i) {
+    CHECK_GT(size(), i);
+    return b_[i];
+  }
+
+  const value_type& operator[](size_t i) const {
+    CHECK_GT(size(), i);
+    return b_[i];
+  }
+
+  value_type& at(size_t i) {
+    if (i >= size()) throw std::out_of_range("index out of range");
+    return b_[i];
+  }
+
+  const value_type& at(size_t i) const {
+    if (i >= size()) throw std::out_of_range("index out of range");
+    return b_[i];
+  }
+
+  // Works only for Range<const char*>
+  uint32_t hash() const {
+    // Taken from fbi/nstring.h:
+    //    Quick and dirty bernstein hash...fine for short ascii strings
+    uint32_t hash = 5381;
+    for (size_t ix = 0; ix < size(); ix++) {
+      hash = ((hash << 5) + hash) + b_[ix];
+    }
+    return hash;
+  }
+
+  void advance(size_type n) {
+    CHECK_LE(n, size());
+    b_ += n;
+  }
+
+  void subtract(size_type n) {
+    CHECK_LE(n, size());
+    e_ -= n;
+  }
+
+  void pop_front() {
+    assert(b_ < e_);
+    ++b_;
+  }
+
+  void pop_back() {
+    assert(b_ < e_);
+    --e_;
+  }
+
+  Range subpiece(size_type first,
+                 size_type length = std::string::npos) const {
+    CHECK_LE(first, size());
+    return Range(b_ + first,
+                 std::min<std::string::size_type>(length, size() - first));
+  }
+
+  // string work-alike functions
+  size_type find(Range str) const {
+    return qfind(*this, str);
+  }
+
+  size_type find(Range str, size_t pos) const {
+    if (pos > size()) return std::string::npos;
+    size_t ret = qfind(subpiece(pos), str);
+    return ret == npos ? ret : ret + pos;
+  }
+
+  size_type find(Iter s, size_t pos, size_t n) const {
+    if (pos > size()) return std::string::npos;
+    size_t ret = qfind(pos ? subpiece(pos) : *this, Range(s, n));
+    return ret == npos ? ret : ret + pos;
+  }
+
+  size_type find(const Iter s) const {
+    return qfind(*this, Range(s));
+  }
+
+  size_type find(const Iter s, size_t pos) const {
+    if (pos > size()) return std::string::npos;
+    size_type ret = qfind(subpiece(pos), Range(s));
+    return ret == npos ? ret : ret + pos;
+  }
+
+  size_type find(value_type c) const {
+    return qfind(*this, c);
+  }
+
+  size_type find(value_type c, size_t pos) const {
+    if (pos > size()) return std::string::npos;
+    size_type ret = qfind(subpiece(pos), c);
+    return ret == npos ? ret : ret + pos;
+  }
+
+  void swap(Range& rhs) {
+    std::swap(b_, rhs.b_);
+    std::swap(e_, rhs.e_);
+  }
+
+private:
+  Iter b_, e_;
+};
+
+template <class Iter>
+const typename Range<Iter>::size_type Range<Iter>::npos;
+
+template <class T>
+void swap(Range<T>& lhs, Range<T>& rhs) {
+  lhs.swap(rhs);
+}
+
+/**
+ * Create a range from two iterators, with type deduction.
+ */
+template <class Iter>
+Range<Iter> makeRange(Iter first, Iter last) {
+  return Range<Iter>(first, last);
+}
+
+typedef Range<const char*> StringPiece;
+
+std::ostream& operator<<(std::ostream& os, const StringPiece& piece);
+
+/**
+ * Templated comparison operators
+ */
+
+template <class T>
+inline bool operator==(const Range<T>& lhs, const Range<T>& rhs) {
+  return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
+}
+
+template <class T>
+inline bool operator<(const Range<T>& lhs, const Range<T>& rhs) {
+  return lhs.compare(rhs) < 0;
+}
+
+/**
+ * Specializations of comparison operators for StringPiece
+ */
+
+namespace detail {
+
+template <class A, class B>
+struct ComparableAsStringPiece {
+  enum {
+    value =
+    (boost::is_convertible<A, StringPiece>::value
+     && boost::is_same<B, StringPiece>::value)
+    ||
+    (boost::is_convertible<B, StringPiece>::value
+     && boost::is_same<A, StringPiece>::value)
+  };
+};
+
+} // namespace detail
+
+/**
+ * operator== through conversion for Range<const char*>
+ */
+template <class T, class U>
+typename
+boost::enable_if_c<detail::ComparableAsStringPiece<T, U>::value, bool>::type
+operator==(const T& lhs, const U& rhs) {
+  return StringPiece(lhs) == StringPiece(rhs);
+}
+
+/**
+ * operator< through conversion for Range<const char*>
+ */
+template <class T, class U>
+typename
+boost::enable_if_c<detail::ComparableAsStringPiece<T, U>::value, bool>::type
+operator<(const T& lhs, const U& rhs) {
+  return StringPiece(lhs) < StringPiece(rhs);
+}
+
+/**
+ * operator> through conversion for Range<const char*>
+ */
+template <class T, class U>
+typename
+boost::enable_if_c<detail::ComparableAsStringPiece<T, U>::value, bool>::type
+operator>(const T& lhs, const U& rhs) {
+  return StringPiece(lhs) > StringPiece(rhs);
+}
+
+/**
+ * operator< through conversion for Range<const char*>
+ */
+template <class T, class U>
+typename
+boost::enable_if_c<detail::ComparableAsStringPiece<T, U>::value, bool>::type
+operator<=(const T& lhs, const U& rhs) {
+  return StringPiece(lhs) <= StringPiece(rhs);
+}
+
+/**
+ * operator> through conversion for Range<const char*>
+ */
+template <class T, class U>
+typename
+boost::enable_if_c<detail::ComparableAsStringPiece<T, U>::value, bool>::type
+operator>=(const T& lhs, const U& rhs) {
+  return StringPiece(lhs) >= StringPiece(rhs);
+}
+
+struct StringPieceHash {
+  std::size_t operator()(const StringPiece& str) const {
+    return static_cast<std::size_t>(str.hash());
+  }
+};
+
+/**
+ * Finds substrings faster than brute force by borrowing from Boyer-Moore
+ */
+template <class T, class Comp>
+size_t qfind(const Range<T>& haystack,
+             const Range<T>& needle,
+             Comp eq) {
+  // Don't use std::search, use a Boyer-Moore-like trick by comparing
+  // the last characters first
+  auto const nsize = needle.size();
+  if (haystack.size() < nsize) {
+    return std::string::npos;
+  }
+  if (!nsize) return 0;
+  auto const nsize_1 = nsize - 1;
+  auto const lastNeedle = needle[nsize_1];
+
+  // Boyer-Moore skip value for the last char in the needle. Zero is
+  // not a valid value; skip will be computed the first time it's
+  // needed.
+  std::string::size_type skip = 0;
+
+  auto i = haystack.begin();
+  auto iEnd = haystack.end() - nsize_1;
+
+  while (i < iEnd) {
+    // Boyer-Moore: match the last element in the needle
+    while (!eq(i[nsize_1], lastNeedle)) {
+      if (++i == iEnd) {
+        // not found
+        return std::string::npos;
+      }
+    }
+    // Here we know that the last char matches
+    // Continue in pedestrian mode
+    for (size_t j = 0; ; ) {
+      assert(j < nsize);
+      if (!eq(i[j], needle[j])) {
+        // Not found, we can skip
+        // Compute the skip value lazily
+        if (skip == 0) {
+          skip = 1;
+          while (skip <= nsize_1 && !eq(needle[nsize_1 - skip], lastNeedle)) {
+            ++skip;
+          }
+        }
+        i += skip;
+        break;
+      }
+      // Check if done searching
+      if (++j == nsize) {
+        // Yay
+        return i - haystack.begin();
+      }
+    }
+  }
+  return std::string::npos;
+}
+
+struct AsciiCaseSensitive {
+  bool operator()(char lhs, char rhs) const {
+    return lhs == rhs;
+  }
+};
+
+struct AsciiCaseInsensitive {
+  bool operator()(char lhs, char rhs) const {
+    return toupper(lhs) == toupper(rhs);
+  }
+};
+
+extern const AsciiCaseSensitive asciiCaseSensitive;
+extern const AsciiCaseInsensitive asciiCaseInsensitive;
+
+template <class T>
+size_t qfind(const Range<T>& haystack,
+             const Range<T>& needle) {
+  return qfind(haystack, needle, asciiCaseSensitive);
+}
+
+template <class T>
+size_t qfind(const Range<T>& haystack,
+             const typename Range<T>::value_type& needle) {
+  return qfind(haystack, makeRange(&needle, &needle + 1));
+}
+
+}  // !namespace folly
+
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(folly::Range);
+
+#endif // FOLLY_RANGE_H_
diff --git a/folly/SConstruct.double-conversion b/folly/SConstruct.double-conversion
new file mode 100644
index 00000000..44ab44ad
--- /dev/null
+++ b/folly/SConstruct.double-conversion
@@ -0,0 +1,21 @@
+double_conversion_sources = ['src/' + x for x in SConscript('src/SConscript')]
+double_conversion_test_sources = ['test/cctest/' + x for x in SConscript('test/cctest/SConscript')]
+test = double_conversion_sources + double_conversion_test_sources
+print(test)
+env = Environment(CPPPATH='#/src')
+debug = ARGUMENTS.get('debug', 0)
+optimize = ARGUMENTS.get('optimize', 0)
+if int(debug):
+  env.Append(CCFLAGS = '-g -Wall -Werror')
+if int(optimize):
+  env.Append(CCFLAGS = '-O3')
+print double_conversion_sources
+print double_conversion_test_sources
+double_conversion_shared_objects = [
+    env.SharedObject(src) for src in double_conversion_sources]
+double_conversion_static_objects = [
+    env.StaticObject(src) for src in double_conversion_sources]
+library_name = 'double_conversion'
+static_lib = env.StaticLibrary(library_name, double_conversion_static_objects)
+env.StaticLibrary(library_name + '_pic', double_conversion_shared_objects)
+env.Program('run_tests', double_conversion_test_sources, LIBS=[static_lib])
diff --git a/folly/ScopeGuard.h b/folly/ScopeGuard.h
new file mode 100644
index 00000000..ae1bae67
--- /dev/null
+++ b/folly/ScopeGuard.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_SCOPEGUARD_H_
+#define FOLLY_SCOPEGUARD_H_
+
+#include <cstddef>
+#include <functional>
+#include <new>
+#include <glog/logging.h>
+
+#include "folly/Preprocessor.h"
+
+namespace folly {
+
+/**
+ * ScopeGuard is a general implementation of the "Initilization is
+ * Resource Acquisition" idiom.  Basically, it guarantees that a function
+ * is executed upon leaving the currrent scope unless otherwise told.
+ *
+ * The makeGuard() function is used to create a new ScopeGuard object.
+ * It can be instantiated with a lambda function, a std::function<void()>,
+ * a functor, or a void(*)() function pointer.
+ *
+ *
+ * Usage example: Add a friend to memory iff it is also added to the db.
+ *
+ * void User::addFriend(User& newFriend) {
+ *   // add the friend to memory
+ *   friends_.push_back(&newFriend);
+ *
+ *   // If the db insertion that follows fails, we should
+ *   // remove it from memory.
+ *   // (You could also declare this as "auto guard = makeGuard(...)")
+ *   ScopeGuard guard = makeGuard([&] { friends_.pop_back(); });
+ *
+ *   // this will throw an exception upon error, which
+ *   // makes the ScopeGuard execute UserCont::pop_back()
+ *   // once the Guard's destructor is called.
+ *   db_->addFriend(GetName(), newFriend.GetName());
+ *
+ *   // an exception was not thrown, so don't execute
+ *   // the Guard.
+ *   guard.dismiss();
+ * }
+ *
+ * Examine ScopeGuardTest.cpp for some more sample usage.
+ *
+ * Stolen from:
+ *   Andrei's and Petru Marginean's CUJ article:
+ *     http://drdobbs.com/184403758
+ *   and the loki library:
+ *     http://loki-lib.sourceforge.net/index.php?n=Idioms.ScopeGuardPointer
+ *   and triendl.kj article:
+ *     http://www.codeproject.com/KB/cpp/scope_guard.aspx
+ */
+class ScopeGuardImplBase {
+ public:
+  void dismiss() noexcept {
+    dismissed_ = true;
+  }
+
+ protected:
+  ScopeGuardImplBase()
+    : dismissed_(false) {}
+
+  ScopeGuardImplBase(ScopeGuardImplBase&& other)
+    : dismissed_(other.dismissed_) {
+    other.dismissed_ = true;
+  }
+
+  bool dismissed_;
+};
+
+template<typename FunctionType>
+class ScopeGuardImpl : public ScopeGuardImplBase {
+ public:
+  explicit ScopeGuardImpl(const FunctionType& fn)
+    : function_(fn) {}
+
+  explicit ScopeGuardImpl(FunctionType&& fn)
+    : function_(std::move(fn)) {}
+
+  ScopeGuardImpl(ScopeGuardImpl&& other)
+    : ScopeGuardImplBase(std::move(other)),
+      function_(std::move(other.function_)) {
+  }
+
+  ~ScopeGuardImpl() noexcept {
+    if (!dismissed_) {
+      execute();
+    }
+  }
+
+private:
+  void* operator new(size_t) = delete;
+
+  void execute() noexcept {
+    try {
+      function_();
+    } catch (const std::exception& ex) {
+      LOG(FATAL) << "ScopeGuard cleanup function threw a " <<
+        typeid(ex).name() << "exception: " << ex.what();
+    } catch (...) {
+      LOG(FATAL) << "ScopeGuard cleanup function threw a non-exception object";
+    }
+  }
+
+  FunctionType function_;
+};
+
+template<typename FunctionType>
+ScopeGuardImpl<typename std::decay<FunctionType>::type>
+makeGuard(FunctionType&& fn) {
+  return ScopeGuardImpl<typename std::decay<FunctionType>::type>(
+      std::forward<FunctionType>(fn));
+}
+
+/**
+ * This is largely unneeded if you just use auto for your guards.
+ */
+typedef ScopeGuardImplBase&& ScopeGuard;
+
+namespace detail {
+/**
+ * Internal use for the macro SCOPE_EXIT below
+ */
+enum class ScopeGuardOnExit {};
+
+template <typename FunctionType>
+ScopeGuardImpl<typename std::decay<FunctionType>::type>
+operator+(detail::ScopeGuardOnExit, FunctionType&& fn) {
+  return ScopeGuardImpl<typename std::decay<FunctionType>::type>(
+      std::forward<FunctionType>(fn));
+}
+} // namespace detail
+
+} // folly
+
+#define SCOPE_EXIT \
+  auto FB_ANONYMOUS_VARIABLE(SCOPE_EXIT_STATE) \
+  = ::folly::detail::ScopeGuardOnExit() + [&]
+
+#endif // FOLLY_SCOPEGUARD_H_
diff --git a/folly/SmallLocks.h b/folly/SmallLocks.h
new file mode 100644
index 00000000..91d2546a
--- /dev/null
+++ b/folly/SmallLocks.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_SMALLLOCKS_H_
+#define FOLLY_SMALLLOCKS_H_
+
+/*
+ * This header defines a few very small mutex types.  These are useful
+ * in highly memory-constrained environments where contention is
+ * unlikely.
+ *
+ * Note: these locks are for use when you aren't likely to contend on
+ * the critical section, or when the critical section is incredibly
+ * small.  Given that, both of the locks defined in this header are
+ * inherently unfair: that is, the longer a thread is waiting, the
+ * longer it waits between attempts to acquire, so newer waiters are
+ * more likely to get the mutex.  For the intended use-case this is
+ * fine.
+ *
+ * @author Keith Adams <kma@fb.com>
+ * @author Jordan DeLong <delong.j@fb.com>
+ */
+
+#include <cinttypes>
+#include <type_traits>
+#include <ctime>
+#include <boost/noncopyable.hpp>
+#include <cstdlib>
+#include <pthread.h>
+#include <mutex>
+
+#include <glog/logging.h>
+
+#ifndef __x86_64__
+# error "SmallLocks.h is currently x64-only."
+#endif
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+namespace detail {
+
+  /*
+   * A helper object for the condended case. Starts off with eager
+   * spinning, and falls back to sleeping for small quantums.
+   */
+  class Sleeper {
+    static const int32_t kMaxActiveSpin = 4000;
+
+    uint32_t spinCount;
+
+  public:
+    Sleeper() : spinCount(0) {}
+
+    void wait() {
+      if (spinCount < kMaxActiveSpin) {
+        ++spinCount;
+        asm volatile("pause");
+      } else {
+        /*
+         * Always sleep 0.5ms, assuming this will make the kernel put
+         * us down for whatever its minimum timer resolution is (in
+         * linux this varies by kernel version from 1ms to 10ms).
+         */
+        struct timespec ts = { 0, 500000 };
+        nanosleep(&ts, NULL);
+      }
+    }
+  };
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * A really, *really* small spinlock for fine-grained locking of lots
+ * of teeny-tiny data.
+ *
+ * Zero initializing these is guaranteed to be as good as calling
+ * init(), since the free state is guaranteed to be all-bits zero.
+ *
+ * This class should be kept a POD, so we can used it in other packed
+ * structs (gcc does not allow __attribute__((packed)) on structs that
+ * contain non-POD data).  This means avoid adding a constructor, or
+ * making some members private, etc.
+ */
+struct MicroSpinLock {
+  enum { FREE = 0, LOCKED = 1 };
+  uint8_t lock_;
+
+  /*
+   * Atomically move lock_ from "compare" to "newval". Return boolean
+   * success. Do not play on or around.
+   */
+  bool cas(uint8_t compare, uint8_t newVal) {
+    bool out;
+    asm volatile("lock; cmpxchgb %2, (%3);"
+                 "setz %0;"
+                 : "=r" (out)
+                 : "a" (compare), // cmpxchgb constrains this to be in %al
+                   "q" (newVal),  // Needs to be byte-accessible
+                   "r" (&lock_)
+                 : "memory", "flags");
+    return out;
+  }
+
+  // Initialize this MSL.  It is unnecessary to call this if you
+  // zero-initialize the MicroSpinLock.
+  void init() {
+    lock_ = FREE;
+  }
+
+  bool try_lock() {
+    return cas(FREE, LOCKED);
+  }
+
+  void lock() {
+    detail::Sleeper sleeper;
+    do {
+      while (lock_ != FREE) {
+        asm volatile("" : : : "memory");
+        sleeper.wait();
+      }
+    } while (!try_lock());
+    DCHECK(lock_ == LOCKED);
+  }
+
+  void unlock() {
+    CHECK(lock_ == LOCKED);
+    asm volatile("" : : : "memory");
+    lock_ = FREE; // release barrier on x86
+  }
+};
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * Spin lock on a single bit in an integral type.  You can use this
+ * with 16, 32, or 64-bit integral types.
+ *
+ * This is useful if you want a small lock and already have an int
+ * with a bit in it that you aren't using.  But note that it can't be
+ * as small as MicroSpinLock (1 byte), if you don't already have a
+ * convenient int with an unused bit lying around to put it on.
+ *
+ * To construct these, either use init() or zero initialize.  We don't
+ * have a real constructor because we want this to be a POD type so we
+ * can put it into packed structs.
+ */
+template<class IntType, int Bit = sizeof(IntType) * 8 - 1>
+struct PicoSpinLock {
+  // Internally we deal with the unsigned version of the type.
+  typedef typename std::make_unsigned<IntType>::type UIntType;
+
+  static_assert(std::is_integral<IntType>::value,
+                "PicoSpinLock needs an integral type");
+  static_assert(sizeof(IntType) == 2 || sizeof(IntType) == 4 ||
+                  sizeof(IntType) == 8,
+                "PicoSpinLock can't work on integers smaller than 2 bytes");
+
+public:
+  static const UIntType kLockBitMask_ = UIntType(1) << Bit;
+  UIntType lock_;
+
+  /*
+   * You must call this function before using this class, if you
+   * default constructed it.  If you zero-initialized it you can
+   * assume the PicoSpinLock is in a valid unlocked state with
+   * getData() == 0.
+   *
+   * (This doesn't use a constructor because we want to be a POD.)
+   */
+  void init(IntType initialValue = 0) {
+    CHECK(!(initialValue & kLockBitMask_));
+    lock_ = initialValue;
+  }
+
+  /*
+   * Returns the value of the integer we using for our lock, except
+   * with the bit we are using as a lock cleared, regardless of
+   * whether the lock is held.
+   *
+   * It is 'safe' to call this without holding the lock.  (As in: you
+   * get the same guarantees for simultaneous accesses to an integer
+   * as you normally get.)
+   */
+  IntType getData() const {
+    return static_cast<IntType>(lock_ & ~kLockBitMask_);
+  }
+
+  /*
+   * Set the value of the other bits in our integer.
+   *
+   * Don't use this when you aren't holding the lock, unless it can be
+   * guaranteed that no other threads may be trying to use this.
+   */
+  void setData(IntType w) {
+    CHECK(!(w & kLockBitMask_));
+    lock_ = (lock_ & kLockBitMask_) | w;
+  }
+
+  /*
+   * Try to get the lock without blocking: returns whether or not we
+   * got it.
+   */
+  bool try_lock() const {
+    bool ret = false;
+
+#define FB_DOBTS(size)                                  \
+  asm volatile("lock; bts" #size " %1, (%2); setnc %0"  \
+               : "=r" (ret)                             \
+               : "i" (Bit),                             \
+                 "r" (&lock_)                           \
+               : "memory", "flags")
+
+    switch (sizeof(IntType)) {
+    case 2: FB_DOBTS(w); break;
+    case 4: FB_DOBTS(l); break;
+    case 8: FB_DOBTS(q); break;
+    }
+
+#undef FB_DOBTS
+
+    return ret;
+  }
+
+  /*
+   * Block until we can acquire the lock.  Uses Sleeper to wait.
+   */
+  void lock() const {
+    detail::Sleeper sleeper;
+    while (!try_lock()) {
+      sleeper.wait();
+    }
+  }
+
+  /*
+   * Release the lock, without changing the value of the rest of the
+   * integer.
+   */
+  void unlock() const {
+#define FB_DOBTR(size)                          \
+  asm volatile("lock; btr" #size " %0, (%1)"    \
+               :                                \
+               : "i" (Bit),                     \
+                 "r" (&lock_)                   \
+               : "memory", "flags")
+
+
+    // Reads and writes can not be reordered wrt locked instructions,
+    // so we don't need a memory fence here.
+    switch (sizeof(IntType)) {
+    case 2: FB_DOBTR(w); break;
+    case 4: FB_DOBTR(l); break;
+    case 8: FB_DOBTR(q); break;
+    }
+
+#undef FB_DOBTR
+  }
+};
+
+//////////////////////////////////////////////////////////////////////
+
+typedef std::lock_guard<MicroSpinLock> MSLGuard;
+
+//////////////////////////////////////////////////////////////////////
+
+}
+
+#endif
diff --git a/folly/StlAllocator.h b/folly/StlAllocator.h
new file mode 100644
index 00000000..5cefb240
--- /dev/null
+++ b/folly/StlAllocator.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_STLALLOCATOR_H_
+#define FOLLY_STLALLOCATOR_H_
+
+#include <memory>
+
+namespace folly {
+
+/**
+ * Wrap a simple allocator into a STL-compliant allocator.
+ *
+ * The simple allocator must provide two methods:
+ *    void* allocate(size_t size);
+ *    void deallocate(void* ptr, size_t size);
+ * which, respectively, allocate a block of size bytes (aligned to the maximum
+ * alignment required on your system), throwing std::bad_alloc if the
+ * allocation can't be satisfied, and free a previously allocated block.
+ *
+ * Note that the following allocator resembles the standard allocator
+ * quite well:
+ *
+ * class MallocAllocator {
+ *  public:
+ *   void* allocate(size_t size) {
+ *     void* p = malloc(size);
+ *     if (!p) throw std::bad_alloc();
+ *     return p;
+ *   }
+ *   void deallocate(void* p) {
+ *     free(p);
+ *   }
+ * };
+ */
+
+// This would be so much simpler with std::allocator_traits, but gcc 4.6.2
+// doesn't support it
+template <class Alloc, class T> class StlAllocator;
+
+template <class Alloc> class StlAllocator<Alloc, void> {
+ public:
+  typedef void value_type;
+  typedef void* pointer;
+  typedef const void* const_pointer;
+  template <class U> struct rebind {
+    typedef StlAllocator<Alloc, U> other;
+  };
+};
+
+template <class Alloc, class T>
+class StlAllocator {
+ public:
+  typedef T value_type;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T& reference;
+  typedef const T& const_reference;
+
+  typedef ptrdiff_t difference_type;
+  typedef size_t size_type;
+
+  StlAllocator() : alloc_(nullptr) { }
+  explicit StlAllocator(Alloc* alloc) : alloc_(alloc) { }
+
+  template <class U> StlAllocator(const StlAllocator<Alloc, U>& other)
+    : alloc_(other.alloc()) { }
+
+  T* allocate(size_t n, const void* hint = nullptr) {
+    return static_cast<T*>(alloc_->allocate(n * sizeof(T)));
+  }
+
+  void deallocate(T* p, size_t n) {
+    alloc_->deallocate(p);
+  }
+
+  size_t max_size() const {
+    return std::numeric_limits<size_t>::max();
+  }
+
+  T* address(T& x) const {
+    return std::addressof(x);
+  }
+
+  const T* address(const T& x) const {
+    return std::addressof(x);
+  }
+
+  template <class... Args>
+  void construct(T* p, Args&&... args) {
+    new (p) T(std::forward<Args>(args)...);
+  }
+
+  void destroy(T* p) {
+    p->~T();
+  }
+
+  Alloc* alloc() const {
+    return alloc_;
+  }
+
+  template <class U> struct rebind {
+    typedef StlAllocator<Alloc, U> other;
+  };
+
+ private:
+  Alloc* alloc_;
+};
+
+}  // namespace folly
+
+#endif /* FOLLY_STLALLOCATOR_H_ */
+
diff --git a/folly/String-inl.h b/folly/String-inl.h
new file mode 100644
index 00000000..af4c4dcf
--- /dev/null
+++ b/folly/String-inl.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_STRING_INL_H_
+#define FOLLY_STRING_INL_H_
+
+#include <stdexcept>
+
+#ifndef FOLLY_BASE_STRING_H_
+#error This file may only be included from String.h
+#endif
+
+namespace folly {
+
+namespace detail {
+// Map from character code to value of one-character escape sequence
+// ('\n' = 10 maps to 'n'), 'O' if the character should be printed as
+// an octal escape sequence, or 'P' if the character is printable and
+// should be printed as is.
+extern const char cEscapeTable[];
+}  // namespace detail
+
+template <class String>
+void cEscape(StringPiece str, String& out) {
+  char esc[4];
+  esc[0] = '\\';
+  out.reserve(out.size() + str.size());
+  auto p = str.begin();
+  auto last = p;  // last regular character
+  // We advance over runs of regular characters (printable, not double-quote or
+  // backslash) and copy them in one go; this is faster than calling push_back
+  // repeatedly.
+  while (p != str.end()) {
+    char c = *p;
+    unsigned char v = static_cast<unsigned char>(c);
+    char e = detail::cEscapeTable[v];
+    if (e == 'P') {  // printable
+      ++p;
+    } else if (e == 'O') {  // octal
+      out.append(&*last, p - last);
+      esc[1] = '0' + ((v >> 6) & 7);
+      esc[2] = '0' + ((v >> 3) & 7);
+      esc[3] = '0' + (v & 7);
+      out.append(esc, 4);
+      ++p;
+      last = p;
+    } else {  // special 1-character escape
+      out.append(&*last, p - last);
+      esc[1] = e;
+      out.append(esc, 2);
+      ++p;
+      last = p;
+    }
+  }
+  out.append(&*last, p - last);
+}
+
+namespace detail {
+// Map from the character code of the character following a backslash to
+// the unescaped character if a valid one-character escape sequence
+// ('n' maps to 10 = '\n'), 'O' if this is the first character of an
+// octal escape sequence, 'X' if this is the first character of a
+// hexadecimal escape sequence, or 'I' if this escape sequence is invalid.
+extern const char cUnescapeTable[];
+
+// Map from the character code to the hex value, or 16 if invalid hex char.
+extern const unsigned char hexTable[];
+}  // namespace detail
+
+template <class String>
+void cUnescape(StringPiece str, String& out, bool strict) {
+  out.reserve(out.size() + str.size());
+  auto p = str.begin();
+  auto last = p;  // last regular character (not part of an escape sequence)
+  // We advance over runs of regular characters (not backslash) and copy them
+  // in one go; this is faster than calling push_back repeatedly.
+  while (p != str.end()) {
+    char c = *p;
+    if (c != '\\') {  // normal case
+      ++p;
+      continue;
+    }
+    out.append(&*last, p - last);
+    if (p == str.end()) {  // backslash at end of string
+      if (strict) {
+        throw std::invalid_argument("incomplete escape sequence");
+      }
+      out.push_back('\\');
+      last = p;
+      continue;
+    }
+    ++p;
+    char e = detail::cUnescapeTable[static_cast<unsigned char>(*p)];
+    if (e == 'O') {  // octal
+      unsigned char val = 0;
+      for (int i = 0; i < 3 && p != str.end() && *p >= '0' && *p <= '7';
+           ++i, ++p) {
+        val = (val << 3) | (*p - '0');
+      }
+      out.push_back(val);
+      last = p;
+    } else if (e == 'X') {  // hex
+      ++p;
+      if (p == str.end()) {  // \x at end of string
+        if (strict) {
+          throw std::invalid_argument("incomplete hex escape sequence");
+        }
+        out.append("\\x");
+        last = p;
+        continue;
+      }
+      unsigned char val = 0;
+      unsigned char h;
+      for (; (p != str.end() &&
+              (h = detail::hexTable[static_cast<unsigned char>(*p)]) < 16);
+           ++p) {
+        val = (val << 4) | h;
+      }
+      out.push_back(val);
+      last = p;
+    } else if (e == 'I') {  // invalid
+      if (strict) {
+        throw std::invalid_argument("invalid escape sequence");
+      }
+      out.push_back('\\');
+      out.push_back(*p);
+      ++p;
+      last = p;
+    } else {  // standard escape sequence, \' etc
+      out.push_back(e);
+      ++p;
+      last = p;
+    }
+  }
+  out.append(&*last, p - last);
+}
+
+namespace detail {
+
+/*
+ * The following functions are type-overloaded helpers for
+ * internalSplit().
+ */
+inline size_t delimSize(char)          { return 1; }
+inline size_t delimSize(StringPiece s) { return s.size(); }
+inline bool atDelim(const char* s, char c) {
+ return *s == c;
+}
+inline bool atDelim(const char* s, StringPiece sp) {
+  return !std::memcmp(s, sp.start(), sp.size());
+}
+
+// These are used to short-circuit internalSplit() in the case of
+// 1-character strings.
+inline char delimFront(char c) {
+  // This one exists only for compile-time; it should never be called.
+  std::abort();
+  return c;
+}
+inline char delimFront(StringPiece s) {
+  assert(!s.empty() && s.start() != nullptr);
+  return *s.start();
+}
+
+/*
+ * These output conversion templates allow us to support multiple
+ * output string types, even when we are using an arbitrary
+ * OutputIterator.
+ */
+template<class OutStringT> struct OutputConverter {};
+
+template<> struct OutputConverter<std::string> {
+  std::string operator()(StringPiece sp) const {
+    return sp.toString();
+  }
+};
+
+template<> struct OutputConverter<fbstring> {
+  fbstring operator()(StringPiece sp) const {
+    return sp.toFbstring();
+  }
+};
+
+template<> struct OutputConverter<StringPiece> {
+  StringPiece operator()(StringPiece sp) const { return sp; }
+};
+
+/*
+ * Shared implementation for all the split() overloads.
+ *
+ * This uses some external helpers that are overloaded to let this
+ * algorithm be more performant if the deliminator is a single
+ * character instead of a whole string.
+ *
+ * @param ignoreEmpty iff true, don't copy empty segments to output
+ */
+template<class OutStringT, class DelimT, class OutputIterator>
+void internalSplit(DelimT delim, StringPiece sp, OutputIterator out,
+    bool ignoreEmpty) {
+  assert(sp.start() != nullptr);
+
+  const char* s = sp.start();
+  const size_t strSize = sp.size();
+  const size_t dSize = delimSize(delim);
+
+  OutputConverter<OutStringT> conv;
+
+  if (dSize > strSize || dSize == 0) {
+    if (!ignoreEmpty || strSize > 0) {
+      *out++ = conv(sp);
+    }
+    return;
+  }
+  if (boost::is_same<DelimT,StringPiece>::value && dSize == 1) {
+    // Call the char version because it is significantly faster.
+    return internalSplit<OutStringT>(delimFront(delim), sp, out,
+      ignoreEmpty);
+  }
+
+  int tokenStartPos = 0;
+  int tokenSize = 0;
+  for (int i = 0; i <= strSize - dSize; ++i) {
+    if (atDelim(&s[i], delim)) {
+      if (!ignoreEmpty || tokenSize > 0) {
+        *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize));
+      }
+
+      tokenStartPos = i + dSize;
+      tokenSize = 0;
+      i += dSize - 1;
+    } else {
+      ++tokenSize;
+    }
+  }
+
+  if (!ignoreEmpty || tokenSize > 0) {
+    tokenSize = strSize - tokenStartPos;
+    *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize));
+  }
+}
+
+template<class String> StringPiece prepareDelim(const String& s) {
+  return StringPiece(s);
+}
+inline char prepareDelim(char c) { return c; }
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+template<class Delim, class String, class OutputType>
+void split(const Delim& delimiter,
+           const String& input,
+           std::vector<OutputType>& out,
+           bool ignoreEmpty) {
+  detail::internalSplit<OutputType>(
+    detail::prepareDelim(delimiter),
+    StringPiece(input),
+    std::back_inserter(out),
+    ignoreEmpty);
+}
+
+template<class Delim, class String, class OutputType>
+void split(const Delim& delimiter,
+           const String& input,
+           fbvector<OutputType>& out,
+           bool ignoreEmpty = false) {
+  detail::internalSplit<OutputType>(
+    detail::prepareDelim(delimiter),
+    StringPiece(input),
+    std::back_inserter(out),
+    ignoreEmpty);
+}
+
+template<class OutputValueType, class Delim, class String,
+         class OutputIterator>
+void splitTo(const Delim& delimiter,
+             const String& input,
+             OutputIterator out,
+             bool ignoreEmpty) {
+  detail::internalSplit<OutputValueType>(
+    detail::prepareDelim(delimiter),
+    StringPiece(input),
+    out,
+    ignoreEmpty);
+}
+
+namespace detail {
+/**
+ * Hex-dump at most 16 bytes starting at offset from a memory area of size
+ * bytes.  Return the number of bytes actually dumped.
+ */
+size_t hexDumpLine(const void* ptr, size_t offset, size_t size,
+                   std::string& line);
+}  // namespace detail
+
+template <class OutIt>
+void hexDump(const void* ptr, size_t size, OutIt out) {
+  size_t offset = 0;
+  std::string line;
+  while (offset < size) {
+    offset += detail::hexDumpLine(ptr, offset, size, line);
+    *out++ = line;
+  }
+}
+
+}  // namespace folly
+
+#endif /* FOLLY_STRING_INL_H_ */
+
diff --git a/folly/String.cpp b/folly/String.cpp
new file mode 100644
index 00000000..e58432b4
--- /dev/null
+++ b/folly/String.cpp
@@ -0,0 +1,313 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/String.h"
+#include "folly/Format.h"
+
+#include <cerrno>
+#include <cstdarg>
+#include <cstring>
+#include <stdexcept>
+#include <iterator>
+#include <glog/logging.h>
+
+#undef FOLLY_DEMANGLE
+#if defined(__GNUG__) && __GNUG__ >= 4
+# include <cxxabi.h>
+# define FOLLY_DEMANGLE 1
+#endif
+
+namespace folly {
+
+namespace {
+
+inline void stringPrintfImpl(std::string& output, const char* format,
+                             va_list args) {
+  // Tru to the space at the end of output for our output buffer.
+  // Find out write point then inflate its size temporarily to its
+  // capacity; we will later shrink it to the size needed to represent
+  // the formatted string.  If this buffer isn't large enough, we do a
+  // resize and try again.
+
+  const auto write_point = output.size();
+  auto remaining = output.capacity() - write_point;
+  output.resize(output.capacity());
+
+  va_list args_copy;
+  va_copy(args_copy, args);
+  int bytes_used = vsnprintf(&output[write_point], remaining, format,
+                             args_copy);
+  va_end(args_copy);
+  if (bytes_used < 0) {
+    throw std::runtime_error(
+      to<std::string>("Invalid format string; snprintf returned negative "
+                      "with format string: ", format));
+  } else if (bytes_used < remaining) {
+    // There was enough room, just shrink and return.
+    output.resize(write_point + bytes_used);
+  } else {
+    output.resize(write_point + bytes_used + 1);
+    remaining = bytes_used + 1;
+    va_list args_copy;
+    va_copy(args_copy, args);
+    bytes_used = vsnprintf(&output[write_point], remaining, format,
+                           args_copy);
+    va_end(args_copy);
+    if (bytes_used + 1 != remaining) {
+      throw std::runtime_error(
+        to<std::string>("vsnprint retry did not manage to work "
+                        "with format string: ", format));
+    }
+    output.resize(write_point + bytes_used);
+  }
+}
+
+}  // anon namespace
+
+std::string stringPrintf(const char* format, ...) {
+  // snprintf will tell us how large the output buffer should be, but
+  // we then have to call it a second time, which is costly.  By
+  // guestimating the final size, we avoid the double snprintf in many
+  // cases, resulting in a performance win.  We use this constructor
+  // of std::string to avoid a double allocation, though it does pad
+  // the resulting string with nul bytes.  Our guestimation is twice
+  // the format string size, or 32 bytes, whichever is larger.  This
+  // is a hueristic that doesn't affect correctness but attempts to be
+  // reasonably fast for the most common cases.
+  std::string ret(std::max(32UL, strlen(format) * 2), '\0');
+  ret.resize(0);
+
+  va_list ap;
+  va_start(ap, format);
+  stringPrintfImpl(ret, format, ap);
+  va_end(ap);
+  return ret;
+}
+
+// Basic declarations; allow for parameters of strings and string
+// pieces to be specified.
+std::string& stringAppendf(std::string* output, const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  stringPrintfImpl(*output, format, ap);
+  va_end(ap);
+  return *output;
+}
+
+void stringPrintf(std::string* output, const char* format, ...) {
+  output->clear();
+  va_list ap;
+  va_start(ap, format);
+  stringPrintfImpl(*output, format, ap);
+  va_end(ap);
+};
+
+namespace {
+
+struct PrettySuffix {
+  const char* suffix;
+  double val;
+};
+
+const PrettySuffix kPrettyTimeSuffixes[] = {
+  { "s ", 1e0L },
+  { "ms", 1e-3L },
+  { "us", 1e-6L },
+  { "ns", 1e-9L },
+  { "ps", 1e-12L },
+  { "s ", 0 },
+  { 0, 0 },
+};
+
+const PrettySuffix kPrettyBytesSuffixes[] = {
+  { "TB", int64_t(1) << 40 },
+  { "GB", int64_t(1) << 30 },
+  { "MB", int64_t(1) << 20 },
+  { "kB", int64_t(1) << 10 },
+  { "B ", 0L },
+  { 0, 0 },
+};
+
+const PrettySuffix kPrettyBytesMetricSuffixes[] = {
+  { "TB", 1e12L },
+  { "GB", 1e9L },
+  { "MB", 1e6L },
+  { "kB", 1e3L },
+  { "B ", 0L },
+  { 0, 0 },
+};
+
+const PrettySuffix kPrettyUnitsMetricSuffixes[] = {
+  { "tril", 1e12L },
+  { "bil",  1e9L },
+  { "M",    1e6L },
+  { "k",    1e3L },
+  { " ",      0  },
+  { 0, 0 },
+};
+
+const PrettySuffix kPrettyUnitsBinarySuffixes[] = {
+  { "T", int64_t(1) << 40 },
+  { "G", int64_t(1) << 30 },
+  { "M", int64_t(1) << 20 },
+  { "k", int64_t(1) << 10 },
+  { " ", 0 },
+  { 0, 0 },
+};
+
+const PrettySuffix* const kPrettySuffixes[PRETTY_NUM_TYPES] = {
+  kPrettyTimeSuffixes,
+  kPrettyBytesSuffixes,
+  kPrettyBytesMetricSuffixes,
+  kPrettyUnitsMetricSuffixes,
+  kPrettyUnitsBinarySuffixes,
+};
+
+}  // namespace
+
+std::string prettyPrint(double val, PrettyType type, bool addSpace) {
+  char buf[100];
+
+  // pick the suffixes to use
+  assert(type >= 0);
+  assert(type < PRETTY_NUM_TYPES);
+  const PrettySuffix* suffixes = kPrettySuffixes[type];
+
+  // find the first suffix we're bigger than -- then use it
+  double abs_val = fabs(val);
+  for (int i = 0; suffixes[i].suffix; ++i) {
+    if (abs_val >= suffixes[i].val) {
+      snprintf(buf, sizeof buf, "%.4g%s%s",
+               (suffixes[i].val ? (val / suffixes[i].val)
+                                : val),
+               (addSpace ? " " : ""),
+               suffixes[i].suffix);
+      return std::string(buf);
+    }
+  }
+
+  // no suffix, we've got a tiny value -- just print it in sci-notation
+  snprintf(buf, sizeof buf, "%.4g", val);
+  return std::string(buf);
+}
+
+std::string hexDump(const void* ptr, size_t size) {
+  std::ostringstream os;
+  hexDump(ptr, size, std::ostream_iterator<StringPiece>(os, "\n"));
+  return os.str();
+}
+
+fbstring errnoStr(int err) {
+  int savedErrno = errno;
+
+  // Ensure that we reset errno upon exit.
+  auto guard(makeGuard([&] { errno = savedErrno; }));
+
+  char buf[1024];
+  buf[0] = '\0';
+
+  fbstring result;
+
+  // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html
+#if (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || \
+     !FOLLY_HAVE_FEATURES_H) && !_GNU_SOURCE
+  // Using XSI-compatible strerror_r
+  int r = strerror_r(err, buf, sizeof(buf));
+
+  if (r == -1) {
+    result = to<fbstring>(
+      "Unknown error ", err,
+      " (strerror_r failed with error ", errno, ")");
+  } else {
+    result.assign(buf);
+  }
+#else
+  // Using GNU strerror_r
+  result.assign(strerror_r(err, buf, sizeof(buf)));
+#endif
+
+  return result;
+}
+
+#ifdef FOLLY_DEMANGLE
+
+fbstring demangle(const char* name) {
+  int status;
+  size_t len = 0;
+  // malloc() memory for the demangled type name
+  char* demangled = abi::__cxa_demangle(name, nullptr, &len, &status);
+  if (status != 0) {
+    return name;
+  }
+  // len is the length of the buffer (including NUL terminator and maybe
+  // other junk)
+  return fbstring(demangled, strlen(demangled), len, AcquireMallocatedString());
+}
+
+#else
+
+fbstring demangle(const char* name) {
+  return name;
+}
+
+#endif
+#undef FOLLY_DEMANGLE
+
+namespace detail {
+
+size_t hexDumpLine(const void* ptr, size_t offset, size_t size,
+                   std::string& line) {
+  // Line layout:
+  // 8: address
+  // 1: space
+  // (1+2)*16: hex bytes, each preceded by a space
+  // 1: space separating the two halves
+  // 3: "  |"
+  // 16: characters
+  // 1: "|"
+  // Total: 78
+  line.clear();
+  line.reserve(78);
+  const uint8_t* p = reinterpret_cast<const uint8_t*>(ptr) + offset;
+  size_t n = std::min(size - offset, size_t(16));
+  format("{:08x} ", offset).appendTo(line);
+
+  for (size_t i = 0; i < n; i++) {
+    if (i == 8) {
+      line.push_back(' ');
+    }
+    format(" {:02x}", p[i]).appendTo(line);
+  }
+
+  // 3 spaces for each byte we're not printing, one separating the halves
+  // if necessary
+  line.append(3 * (16 - n) + (n <= 8), ' ');
+  line.append("  |");
+
+  for (size_t i = 0; i < n; i++) {
+    char c = (p[i] >= 32 && p[i] <= 126 ? static_cast<char>(p[i]) : '.');
+    line.push_back(c);
+  }
+  line.append(16 - n, ' ');
+  line.push_back('|');
+  DCHECK_EQ(line.size(), 78);
+
+  return n;
+}
+
+} // namespace detail
+
+}   // namespace folly
diff --git a/folly/String.h b/folly/String.h
new file mode 100644
index 00000000..57dfe876
--- /dev/null
+++ b/folly/String.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_BASE_STRING_H_
+#define FOLLY_BASE_STRING_H_
+
+#include <string>
+#include <boost/type_traits.hpp>
+
+#ifdef __GNUC__
+# include <ext/hash_set>
+# include <ext/hash_map>
+#endif
+
+#include "folly/Conv.h"
+#include "folly/FBString.h"
+#include "folly/FBVector.h"
+#include "folly/Range.h"
+#include "folly/ScopeGuard.h"
+
+// Compatibility function, to make sure toStdString(s) can be called
+// to convert a std::string or fbstring variable s into type std::string
+// with very little overhead if s was already std::string
+namespace folly {
+
+inline
+std::string toStdString(const folly::fbstring& s) {
+  return std::string(s.data(), s.size());
+}
+
+inline
+const std::string& toStdString(const std::string& s) {
+  return s;
+}
+
+// If called with a temporary, the compiler will select this overload instead
+// of the above, so we don't return a (lvalue) reference to a temporary.
+inline
+std::string&& toStdString(std::string&& s) {
+  return std::move(s);
+}
+
+/**
+ * C-Escape a string, making it suitable for representation as a C string
+ * literal.  Appends the result to the output string.
+ *
+ * Backslashes all occurrences of backslash and double-quote:
+ *   "  ->  \"
+ *   \  ->  \\
+ *
+ * Replaces all non-printable ASCII characters with backslash-octal
+ * representation:
+ *   <ASCII 254> -> \376
+ *
+ * Note that we use backslash-octal instead of backslash-hex because the octal
+ * representation is guaranteed to consume no more than 3 characters; "\3760"
+ * represents two characters, one with value 254, and one with value 48 ('0'),
+ * whereas "\xfe0" represents only one character (with value 4064, which leads
+ * to implementation-defined behavior).
+ */
+template <class String>
+void cEscape(StringPiece str, String& out);
+
+/**
+ * Similar to cEscape above, but returns the escaped string.
+ */
+template <class String>
+String cEscape(StringPiece str) {
+  String out;
+  cEscape(str, out);
+  return out;
+}
+
+/**
+ * C-Unescape a string; the opposite of cEscape above.  Appends the result
+ * to the output string.
+ *
+ * Recognizes the standard C escape sequences:
+ *
+ * \' \" \? \\ \a \b \f \n \r \t \v
+ * \[0-7]+
+ * \x[0-9a-fA-F]+
+ *
+ * In strict mode (default), throws std::invalid_argument if it encounters
+ * an unrecognized escape sequence.  In non-strict mode, it leaves
+ * the escape sequence unchanged.
+ */
+template <class String>
+void cUnescape(StringPiece str, String& out, bool strict = true);
+
+/**
+ * Similar to cUnescape above, but returns the escaped string.
+ */
+template <class String>
+String cUnescape(StringPiece str, bool strict = true) {
+  String out;
+  cUnescape(str, out, strict);
+  return out;
+}
+
+/**
+ * stringPrintf is much like printf but deposits its result into a
+ * string. Two signatures are supported: the first simply returns the
+ * resulting string, and the second appends the produced characters to
+ * the specified string and returns a reference to it.
+ */
+std::string stringPrintf(const char* format, ...)
+  __attribute__ ((format (printf, 1, 2)));
+
+/** Similar to stringPrintf, with different signiture.
+  */
+void stringPrintf(std::string* out, const char* fmt, ...)
+  __attribute__ ((format (printf, 2, 3)));
+
+std::string& stringAppendf(std::string* output, const char* format, ...)
+  __attribute__ ((format (printf, 2, 3)));
+
+/*
+ * A pretty-printer for numbers that appends suffixes of units of the
+ * given type.  It prints 4 sig-figs of value with the most
+ * appropriate unit.
+ *
+ * If `addSpace' is true, we put a space between the units suffix and
+ * the value.
+ *
+ * Current types are:
+ *   PRETTY_TIME         - s, ms, us, ns, etc.
+ *   PRETTY_BYTES        - kb, MB, GB, etc (goes up by 2^10 = 1024 each time)
+ *   PRETTY_BYTES_METRIC - kb, MB, GB, etc (goes up by 10^3 = 1000 each time)
+ *   PRETTY_UNITS_METRIC - k, M, G, etc (goes up by 10^3 = 1000 each time)
+ *   PRETTY_UNITS_BINARY - k, M, G, etc (goes up by 2^10 = 1024 each time)
+ *
+ * @author Mark Rabkin <mrabkin@fb.com>
+ */
+enum PrettyType {
+  PRETTY_TIME,
+  PRETTY_BYTES,
+  PRETTY_BYTES_METRIC,
+  PRETTY_UNITS_METRIC,
+  PRETTY_UNITS_BINARY,
+
+  PRETTY_NUM_TYPES
+};
+
+std::string prettyPrint(double val, PrettyType, bool addSpace = true);
+
+/**
+ * Write a hex dump of size bytes starting at ptr to out.
+ *
+ * The hex dump is formatted as follows:
+ *
+ * for the string "abcdefghijklmnopqrstuvwxyz\x02"
+00000000  61 62 63 64 65 66 67 68  69 6a 6b 6c 6d 6e 6f 70  |abcdefghijklmnop|
+00000010  71 72 73 74 75 76 77 78  79 7a 02                 |qrstuvwxyz.     |
+ *
+ * that is, we write 16 bytes per line, both as hex bytes and as printable
+ * characters.  Non-printable characters are replaced with '.'
+ * Lines are written to out one by one (one StringPiece at a time) without
+ * delimiters.
+ */
+template <class OutIt>
+void hexDump(const void* ptr, size_t size, OutIt out);
+
+/**
+ * Return the hex dump of size bytes starting at ptr as a string.
+ */
+std::string hexDump(const void* ptr, size_t size);
+
+/**
+ * Return a fbstring containing the description of the given errno value.
+ * Takes care not to overwrite the actual system errno, so calling
+ * errnoStr(errno) is valid.
+ */
+fbstring errnoStr(int err);
+
+/**
+ * Return the demangled (prettyfied) version of a C++ type.
+ *
+ * This function tries to produce a human-readable type, but the type name will
+ * be returned unchanged in case of error or if demangling isn't supported on
+ * your system.
+ *
+ * Use for debugging -- do not rely on demangle() returning anything useful.
+ *
+ * This function may allocate memory (and therefore throw).
+ */
+fbstring demangle(const char* name);
+inline fbstring demangle(const std::type_info& type) {
+  return demangle(type.name());
+}
+
+/**
+ * Debug string for an exception: include type and what().
+ */
+inline fbstring exceptionStr(const std::exception& e) {
+  return folly::to<fbstring>(demangle(typeid(e)), ": ", e.what());
+}
+
+/*
+ * Split a string into a list of tokens by delimiter.
+ *
+ * The split interface here supports different output types, selected
+ * at compile time: StringPiece, fbstring, or std::string.  If you are
+ * using a vector to hold the output, it detects the type based on
+ * what your vector contains.
+ *
+ * You can also use splitTo() to write the output to an arbitrary
+ * OutputIterator (e.g. std::inserter() on a std::set<>), in which
+ * case you have to tell the function the type.  (Rationale:
+ * OutputIterators don't have a value_type, so we can't detect the
+ * type in split without being told.)
+ *
+ * Examples:
+ *
+ *   std::vector<folly::StringPiece> v;
+ *   folly::split(":", "asd:bsd", v);
+ *
+ *   std::set<StringPiece> s;
+ *   folly::splitTo<StringPiece>(":", "asd:bsd:asd:csd",
+ *    std::inserter(s, s.begin()));
+ *
+ * Split also takes a flag (ignoreEmpty) that indicates whether
+ * adjacent tokens should be treated as one separator or not.  Note
+ * that unlikely strtok() the default is to treat them as separators.
+ */
+
+template<class Delim, class String, class OutputType>
+void split(const Delim& delimiter,
+           const String& input,
+           std::vector<OutputType>& out,
+           bool ignoreEmpty = false);
+
+template<class Delim, class String, class OutputType>
+void split(const Delim& delimiter,
+           const String& input,
+           folly::fbvector<OutputType>& out,
+           bool ignoreEmpty = false);
+
+template<class OutputValueType, class Delim, class String,
+         class OutputIterator>
+void splitTo(const Delim& delimiter,
+             const String& input,
+             OutputIterator out,
+             bool ignoreEmpty = false);
+
+} // namespace folly
+
+// Hash functions for string and fbstring usable with e.g. hash_map
+#ifdef __GNUC__
+namespace __gnu_cxx {
+
+template <class C>
+struct hash<folly::basic_fbstring<C> > : private hash<const C*> {
+  size_t operator()(const folly::basic_fbstring<C> & s) const {
+    return hash<const C*>::operator()(s.c_str());
+  }
+};
+
+template <class C>
+struct hash<std::basic_string<C> > : private hash<const C*> {
+  size_t operator()(const std::basic_string<C> & s) const {
+    return hash<const C*>::operator()(s.c_str());
+  }
+};
+
+} // namespace __gnu_cxx
+#endif
+
+// Hook into boost's type traits
+namespace boost {
+template <class T>
+struct has_nothrow_constructor<folly::basic_fbstring<T> > : true_type {
+  enum { value = true };
+};
+} // namespace boost
+
+#include "folly/String-inl.h"
+
+#endif
diff --git a/folly/Synchronized.h b/folly/Synchronized.h
new file mode 100644
index 00000000..ad365b88
--- /dev/null
+++ b/folly/Synchronized.h
@@ -0,0 +1,653 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This module implements a Synchronized abstraction useful in
+ * mutex-based concurrency.
+ *
+ * @author: Andrei Alexandrescu (andrei.alexandrescu@fb.com)
+ */
+
+#ifndef SYNCHRONIZED_H_
+#define SYNCHRONIZED_H_
+
+#include <type_traits>
+#include <mutex>
+#include <boost/thread.hpp>
+#include "folly/Preprocessor.h"
+#include "folly/Traits.h"
+
+namespace folly {
+
+namespace detail {
+enum InternalDoNotUse {};
+
+/**
+ * Free function adaptors for std:: and boost::
+ */
+
+/**
+ * Yields true iff T has .lock() and .unlock() member functions. This
+ * is done by simply enumerating the mutexes with this interface in
+ * std and boost.
+ */
+template <class T>
+struct HasLockUnlock {
+  enum { value = IsOneOf<T,
+         std::mutex, std::recursive_mutex,
+         std::timed_mutex, std::recursive_timed_mutex,
+         boost::mutex, boost::recursive_mutex, boost::shared_mutex,
+         boost::timed_mutex, boost::recursive_timed_mutex
+         >::value };
+};
+
+/**
+ * Acquires a mutex for reading by calling .lock(). The exception is
+ * boost::shared_mutex, which has a special read-lock primitive called
+ * .lock_shared().
+ */
+template <class T>
+typename std::enable_if<
+  HasLockUnlock<T>::value && !std::is_same<T, boost::shared_mutex>::value>::type
+acquireRead(T& mutex) {
+  mutex.lock();
+}
+
+/**
+ * Special case for boost::shared_mutex.
+ */
+template <class T>
+typename std::enable_if<std::is_same<T, boost::shared_mutex>::value>::type
+acquireRead(T& mutex) {
+  mutex.lock_shared();
+}
+
+/**
+ * Acquires a mutex for reading with timeout by calling .timed_lock(). This
+ * applies to three of the boost mutex classes as enumerated below.
+ */
+template <class T>
+typename std::enable_if<std::is_same<T, boost::shared_mutex>::value, bool>::type
+acquireRead(T& mutex,
+            unsigned int milliseconds) {
+  return mutex.timed_lock_shared(boost::posix_time::milliseconds(milliseconds));
+}
+
+/**
+ * Acquires a mutex for reading and writing by calling .lock().
+ */
+template <class T>
+typename std::enable_if<HasLockUnlock<T>::value>::type
+acquireReadWrite(T& mutex) {
+  mutex.lock();
+}
+
+/**
+ * Acquires a mutex for reading and writing with timeout by calling
+ * .try_lock_for(). This applies to two of the std mutex classes as
+ * enumerated below.
+ */
+template <class T>
+typename std::enable_if<
+  IsOneOf<T, std::timed_mutex, std::recursive_timed_mutex>::value, bool>::type
+acquireReadWrite(T& mutex,
+                 unsigned int milliseconds) {
+  return mutex.try_lock_for(std::chrono::milliseconds(milliseconds));
+}
+
+/**
+ * Acquires a mutex for reading and writing with timeout by calling
+ * .timed_lock(). This applies to three of the boost mutex classes as
+ * enumerated below.
+ */
+template <class T>
+typename std::enable_if<
+  IsOneOf<T, boost::shared_mutex, boost::timed_mutex,
+          boost::recursive_timed_mutex>::value, bool>::type
+acquireReadWrite(T& mutex,
+                 unsigned int milliseconds) {
+  return mutex.timed_lock(boost::posix_time::milliseconds(milliseconds));
+}
+
+/**
+ * Releases a mutex previously acquired for reading by calling
+ * .unlock(). The exception is boost::shared_mutex, which has a
+ * special primitive called .unlock_shared().
+ */
+template <class T>
+typename std::enable_if<
+  HasLockUnlock<T>::value && !std::is_same<T, boost::shared_mutex>::value>::type
+releaseRead(T& mutex) {
+  mutex.unlock();
+}
+
+/**
+ * Special case for boost::shared_mutex.
+ */
+template <class T>
+typename std::enable_if<std::is_same<T, boost::shared_mutex>::value>::type
+releaseRead(T& mutex) {
+  mutex.unlock_shared();
+}
+
+/**
+ * Releases a mutex previously acquired for reading-writing by calling
+ * .unlock().
+ */
+template <class T>
+typename std::enable_if<HasLockUnlock<T>::value>::type
+releaseReadWrite(T& mutex) {
+  mutex.unlock();
+}
+
+} // namespace detail
+
+/**
+ * Synchronized<T> encapsulates an object of type T (a "datum") paired
+ * with a mutex. The only way to access the datum is while the mutex
+ * is locked, and Synchronized makes it virtually impossible to do
+ * otherwise. The code that would access the datum in unsafe ways
+ * would look odd and convoluted, thus readily alerting the human
+ * reviewer. In contrast, the code that uses Synchronized<T> correctly
+ * looks simple and intuitive.
+ *
+ * The second parameter must be a mutex type. Supported mutexes are
+ * std::mutex, std::recursive_mutex, std::timed_mutex,
+ * std::recursive_timed_mutex, boost::mutex, boost::recursive_mutex,
+ * boost::shared_mutex, boost::timed_mutex,
+ * boost::recursive_timed_mutex, and the folly/RWSpinLock.h
+ * classes.
+ *
+ * You may define Synchronized support by defining 4-6 primitives in
+ * the same namespace as the mutex class (found via ADL).  The
+ * primitives are: acquireRead, acquireReadWrite, releaseRead, and
+ * releaseReadWrite. Two optional primitives for timout operations are
+ * overloads of acquireRead and acquireReadWrite. For signatures,
+ * refer to the namespace detail below, which implements the
+ * primitives for mutexes in std and boost.
+ */
+template <class T, class Mutex = boost::shared_mutex>
+struct Synchronized {
+  /**
+   * Default constructor leaves both members call their own default
+   * constructor.
+   */
+  Synchronized() = default;
+
+  /**
+   * Copy constructor copies the data (with locking the source and
+   * all) but does NOT copy the mutex. Doing so would result in
+   * deadlocks.
+   */
+  Synchronized(const Synchronized& rhs) {
+    auto guard = rhs.operator->();
+    datum_ = rhs.datum_;
+  }
+
+  /**
+   * Move constructor moves the data (with locking the source and all)
+   * but does not move the mutex.
+   */
+  Synchronized(Synchronized&& rhs) {
+    auto guard = rhs.operator->();
+    datum_ = std::move(rhs.datum_);
+  }
+
+  /**
+   * Constructor taking a datum as argument copies it. There is no
+   * need to lock the constructing object.
+   */
+  explicit Synchronized(const T& rhs) : datum_(rhs) {}
+
+  /**
+   * Constructor taking a datum rvalue as argument moves it. Again,
+   * there is no need to lock the constructing object.
+   */
+  explicit Synchronized(T && rhs) : datum_(std::move(rhs)) {}
+
+  /**
+   * The canonical assignment operator only assigns the data, NOT the
+   * mutex. It locks the two objects in ascending order of their
+   * addresses.
+   */
+  Synchronized& operator=(const Synchronized& rhs) {
+    if (this < *rhs) {
+      auto guard1 = operator->();
+      auto guard2 = rhs.operator->();
+      datum_ = rhs.datum_;
+    } else {
+      auto guard1 = rhs.operator->();
+      auto guard2 = operator->();
+      datum_ = rhs.datum_;
+    }
+    return *this;
+  }
+
+  /**
+   * Lock object, assign datum.
+   */
+  Synchronized& operator=(const T& rhs) {
+    auto guard = operator->();
+    datum_ = rhs;
+    return *this;
+  }
+
+  /**
+   * A LockedPtr lp keeps a modifiable (i.e. non-const)
+   * Synchronized<T> object locked for the duration of lp's
+   * existence. Because of this, you get to access the datum's methods
+   * directly by using lp->fun().
+   */
+  struct LockedPtr {
+    /**
+     * Found no reason to leave this hanging.
+     */
+    LockedPtr() = delete;
+
+    /**
+     * Takes a Synchronized and locks it.
+     */
+    explicit LockedPtr(Synchronized* parent) : parent_(parent) {
+      acquire();
+    }
+
+    /**
+     * Takes a Synchronized and attempts to lock it for some
+     * milliseconds. If not, the LockedPtr will be subsequently null.
+     */
+    LockedPtr(Synchronized* parent, unsigned int milliseconds) {
+      using namespace detail;
+      if (acquireReadWrite(parent->mutex_, milliseconds)) {
+        parent_ = parent;
+        return;
+      }
+      // Could not acquire the resource, pointer is null
+      parent_ = NULL;
+    }
+
+    /**
+     * This is used ONLY inside SYNCHRONIZED_DUAL. It initializes
+     * everything properly, but does not lock the parent because it
+     * "knows" someone else will lock it. Please do not use.
+     */
+    LockedPtr(Synchronized* parent, detail::InternalDoNotUse)
+        : parent_(parent) {
+    }
+
+    /**
+     * Copy ctor adds one lock.
+     */
+    LockedPtr(const LockedPtr& rhs) : parent_(rhs.parent_) {
+      acquire();
+    }
+
+    /**
+     * Assigning from another LockedPtr results in freeing the former
+     * lock and acquiring the new one. The method works with
+     * self-assignment (does nothing).
+     */
+    LockedPtr& operator=(const LockedPtr& rhs) {
+      if (parent_ != rhs.parent_) {
+        if (parent_) parent_->mutex_.unlock();
+        parent_ = rhs.parent_;
+        acquire();
+      }
+      return *this;
+    }
+
+    /**
+     * Destructor releases.
+     */
+    ~LockedPtr() {
+      using namespace detail;
+      if (parent_) releaseReadWrite(parent_->mutex_);
+    }
+
+    /**
+     * Safe to access the data. Don't save the obtained pointer by
+     * invoking lp.operator->() by hand. Also, if the method returns a
+     * handle stored inside the datum, don't use this idiom - use
+     * SYNCHRONIZED below.
+     */
+    T* operator->() {
+      return parent_ ? &parent_->datum_ : NULL;
+    }
+
+    /**
+     * This class temporarily unlocks a LockedPtr in a scoped
+     * manner. It is used inside of the UNSYNCHRONIZED macro.
+     */
+    struct Unsynchronizer {
+      explicit Unsynchronizer(LockedPtr* p) : parent_(p) {
+        using namespace detail;
+        releaseReadWrite(parent_->parent_->mutex_);
+      }
+      Unsynchronizer(const Unsynchronizer&) = delete;
+      Unsynchronizer& operator=(const Unsynchronizer&) = delete;
+      ~Unsynchronizer() {
+        parent_->acquire();
+      }
+      LockedPtr* operator->() const {
+        return parent_;
+      }
+    private:
+      LockedPtr* parent_;
+    };
+    friend struct Unsynchronizer;
+    Unsynchronizer typeHackDoNotUse();
+
+    template <class P1, class P2>
+    friend void lockInOrder(P1& p1, P2& p2);
+
+  private:
+    void acquire() {
+      using namespace detail;
+      if (parent_) acquireReadWrite(parent_->mutex_);
+    }
+
+    // This is the entire state of LockedPtr.
+    Synchronized* parent_;
+  };
+
+  /**
+   * ConstLockedPtr does exactly what LockedPtr does, but for const
+   * Synchronized objects. Of interest is that ConstLockedPtr only
+   * uses a read lock, which is faster but more restrictive - you only
+   * get to call const methods of the datum.
+   *
+   * Much of the code between LockedPtr and
+   * ConstLockedPtr is identical and could be factor out, but there
+   * are enough nagging little differences to not justify the trouble.
+   */
+  struct ConstLockedPtr {
+    ConstLockedPtr() = delete;
+    explicit ConstLockedPtr(const Synchronized* parent) : parent_(parent) {
+      acquire();
+    }
+    ConstLockedPtr(const Synchronized* parent, detail::InternalDoNotUse)
+        : parent_(parent) {
+    }
+    ConstLockedPtr(const ConstLockedPtr& rhs) : parent_(rhs.parent_) {
+      acquire();
+    }
+    explicit ConstLockedPtr(const LockedPtr& rhs) : parent_(rhs.parent_) {
+      acquire();
+    }
+    ConstLockedPtr(const Synchronized* parent, unsigned int milliseconds) {
+      if (parent->mutex_.timed_lock(
+            boost::posix_time::milliseconds(milliseconds))) {
+        parent_ = parent;
+        return;
+      }
+      // Could not acquire the resource, pointer is null
+      parent_ = NULL;
+    }
+
+    ConstLockedPtr& operator=(const ConstLockedPtr& rhs) {
+      if (parent_ != rhs.parent_) {
+        if (parent_) parent_->mutex_.unlock_shared();
+        parent_ = rhs.parent_;
+        acquire();
+      }
+    }
+    ~ConstLockedPtr() {
+      using namespace detail;
+      if (parent_) releaseRead(parent_->mutex_);
+    }
+
+    const T* operator->() const {
+      return parent_ ? &parent_->datum_ : NULL;
+    }
+
+    struct Unsynchronizer {
+      explicit Unsynchronizer(ConstLockedPtr* p) : parent_(p) {
+        using namespace detail;
+        releaseRead(parent_->parent_->mutex_);
+      }
+      Unsynchronizer(const Unsynchronizer&) = delete;
+      Unsynchronizer& operator=(const Unsynchronizer&) = delete;
+      ~Unsynchronizer() {
+        using namespace detail;
+        acquireRead(parent_->parent_->mutex_);
+      }
+      ConstLockedPtr* operator->() const {
+        return parent_;
+      }
+    private:
+      ConstLockedPtr* parent_;
+    };
+    friend struct Unsynchronizer;
+    Unsynchronizer typeHackDoNotUse();
+
+    template <class P1, class P2>
+    friend void lockInOrder(P1& p1, P2& p2);
+
+  private:
+    void acquire() {
+      using namespace detail;
+      if (parent_) acquireRead(parent_->mutex_);
+    }
+
+    const Synchronized* parent_;
+  };
+
+  /**
+   * This accessor offers a LockedPtr. In turn. LockedPtr offers
+   * operator-> returning a pointer to T. The operator-> keeps
+   * expanding until it reaches a pointer, so syncobj->foo() will lock
+   * the object and call foo() against it.
+  */
+  LockedPtr operator->() {
+    return LockedPtr(this);
+  }
+
+  /**
+   * Same, for constant objects. You will be able to invoke only const
+   * methods.
+   */
+  ConstLockedPtr operator->() const {
+    return ConstLockedPtr(this);
+  }
+
+  /**
+   * Attempts to acquire for a given number of milliseconds. If
+   * acquisition is unsuccessful, the returned LockedPtr is NULL.
+   */
+  LockedPtr timedAcquire(unsigned int milliseconds) {
+    return LockedPtr(this, milliseconds);
+  }
+
+  /**
+   * As above, for a constant object.
+   */
+  ConstLockedPtr timedAcquire(unsigned int milliseconds) const {
+    return ConstLockedPtr(this, milliseconds);
+  }
+
+  /**
+   * Used by SYNCHRONIZED_DUAL.
+   */
+  LockedPtr internalDoNotUse() {
+    return LockedPtr(this, detail::InternalDoNotUse());
+  }
+
+  /**
+   * ditto
+   */
+  ConstLockedPtr internalDoNotUse() const {
+    return ConstLockedPtr(this, detail::InternalDoNotUse());
+  }
+
+  /**
+   * Sometimes, although you have a mutable object, you only want to
+   * call a const method against it. The most efficient way to achieve
+   * that is by using a read lock. You get to do so by using
+   * obj.asConst()->method() instead of obj->method().
+   */
+  const Synchronized& asConst() const {
+    return *this;
+  }
+
+  /**
+   * Swaps with another Synchronized. Protected against
+   * self-swap. Only data is swapped. Locks are acquired in increasing
+   * address order.
+   */
+  void swap(Synchronized& rhs) {
+    if (this == &rhs) {
+      return;
+    }
+    if (this > &rhs) {
+      return rhs.swap(*this);
+    }
+    auto guard1 = operator->();
+    auto guard2 = rhs.operator->();
+    datum_.swap(rhs.datum_);
+  }
+
+  /**
+   * Swap with another datum. Recommended because it keeps the mutex
+   * held only briefly.
+   */
+  void swap(T& rhs) {
+    LockedPtr guard = operator->();
+    datum_.swap(rhs);
+  }
+
+  /**
+   * Copies datum to a given target.
+   */
+  void copy(T* target) const {
+    ConstLockedPtr guard = operator->();
+    *target = datum_;
+  }
+
+  /**
+   * Returns a fresh copy of the datum.
+   */
+  T copy() const {
+    ConstLockedPtr guard = operator->();
+    return datum_;
+  }
+
+private:
+  T datum_;
+  mutable Mutex mutex_;
+};
+
+// Non-member swap primitive
+template <class T, class M>
+void swap(Synchronized<T, M>& lhs, Synchronized<T, M>& rhs) {
+  lhs.swap(rhs);
+}
+
+/**
+ * SYNCHRONIZED is the main facility that makes Synchronized<T>
+ * helpful. It is a pseudo-statement that introduces a scope where the
+ * object is locked. Inside that scope you get to access the unadorned
+ * datum.
+ *
+ * Example:
+ *
+ * Synchronized<vector<int>> svector;
+ * ...
+ * SYNCHRONIZED (svector) { ... use svector as a vector<int> ... }
+ * or
+ * SYNCHRONIZED (v, svector) { ... use v as a vector<int> ... }
+ *
+ * Refer to folly/docs/Synchronized.md for a detailed explanation and more
+ * examples.
+ */
+#define SYNCHRONIZED(...)                                       \
+  if (bool SYNCHRONIZED_state = false) {} else                  \
+    for (auto SYNCHRONIZED_lockedPtr =                          \
+           (FB_ARG_2_OR_1(__VA_ARGS__)).operator->();           \
+         !SYNCHRONIZED_state; SYNCHRONIZED_state = true)        \
+      for (auto& FB_ARG_1(__VA_ARGS__) =                        \
+             *SYNCHRONIZED_lockedPtr.operator->();              \
+           !SYNCHRONIZED_state; SYNCHRONIZED_state = true)
+
+#define TIMED_SYNCHRONIZED(timeout, ...)                           \
+  if (bool SYNCHRONIZED_state = false) {} else                     \
+    for (auto SYNCHRONIZED_lockedPtr =                             \
+           (FB_ARG_2_OR_1(__VA_ARGS__)).timedAcquire(timeout);     \
+         !SYNCHRONIZED_state; SYNCHRONIZED_state = true)           \
+      for (auto FB_ARG_1(__VA_ARGS__) =                            \
+             SYNCHRONIZED_lockedPtr.operator->();                  \
+           !SYNCHRONIZED_state; SYNCHRONIZED_state = true)
+
+/**
+ * Similar to SYNCHRONIZED, but only uses a read lock.
+ */
+#define SYNCHRONIZED_CONST(...)                         \
+  SYNCHRONIZED(FB_ARG_1(__VA_ARGS__),                   \
+               (FB_ARG_2_OR_1(__VA_ARGS__)).asConst())
+
+/**
+ * Similar to TIMED_SYNCHRONIZED, but only uses a read lock.
+ */
+#define TIMED_SYNCHRONIZED_CONST(timeout, ...)                  \
+  TIMED_SYNCHRONIZED(timeout, FB_ARG_1(__VA_ARGS__),            \
+                     (FB_ARG_2_OR_1(__VA_ARGS__)).asConst())
+
+/**
+ * Temporarily disables synchronization inside a SYNCHRONIZED block.
+ */
+#define UNSYNCHRONIZED(name)                                    \
+  for (decltype(SYNCHRONIZED_lockedPtr.typeHackDoNotUse())      \
+         SYNCHRONIZED_state3(&SYNCHRONIZED_lockedPtr);          \
+       !SYNCHRONIZED_state; SYNCHRONIZED_state = true)          \
+    for (auto name = *SYNCHRONIZED_state3.operator->();         \
+         !SYNCHRONIZED_state; SYNCHRONIZED_state = true)
+
+/**
+ * Locks two objects in increasing order of their addresses.
+ */
+template <class P1, class P2>
+void lockInOrder(P1& p1, P2& p2) {
+  if (static_cast<const void*>(p1.operator->()) >
+      static_cast<const void*>(p2.operator->())) {
+    p2.acquire();
+    p1.acquire();
+  } else {
+    p1.acquire();
+    p2.acquire();
+  }
+}
+
+/**
+ * Synchronizes two Synchronized objects (they may encapsulate
+ * different data). Synchronization is done in increasing address of
+ * object order, so there is no deadlock risk.
+ */
+#define SYNCHRONIZED_DUAL(n1, e1, n2, e2)                       \
+  if (bool SYNCHRONIZED_state = false) {} else                  \
+    for (auto SYNCHRONIZED_lp1 = (e1).internalDoNotUse();       \
+         !SYNCHRONIZED_state; SYNCHRONIZED_state = true)        \
+      for (auto& n1 = *SYNCHRONIZED_lp1.operator->();           \
+           !SYNCHRONIZED_state;  SYNCHRONIZED_state = true)     \
+        for (auto SYNCHRONIZED_lp2 = (e2).internalDoNotUse();   \
+             !SYNCHRONIZED_state;  SYNCHRONIZED_state = true)   \
+          for (auto& n2 = *SYNCHRONIZED_lp2.operator->();       \
+               !SYNCHRONIZED_state; SYNCHRONIZED_state = true)  \
+            if ((::folly::lockInOrder(                          \
+                   SYNCHRONIZED_lp1, SYNCHRONIZED_lp2),         \
+                 false)) {}                                     \
+            else
+
+} /* namespace folly */
+
+#endif // SYNCHRONIZED_H_
diff --git a/folly/ThreadCachedArena.cpp b/folly/ThreadCachedArena.cpp
new file mode 100644
index 00000000..e81cfa77
--- /dev/null
+++ b/folly/ThreadCachedArena.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/ThreadCachedArena.h"
+
+namespace folly {
+
+ThreadCachedArena::ThreadCachedArena(size_t minBlockSize)
+  : minBlockSize_(minBlockSize) {
+}
+
+SysArena* ThreadCachedArena::allocateThreadLocalArena() {
+  SysArena* arena = new SysArena(minBlockSize_);
+  auto disposer = [this] (SysArena* t, TLPDestructionMode mode) {
+    std::unique_ptr<SysArena> tp(t);  // ensure it gets deleted
+    if (mode == TLPDestructionMode::THIS_THREAD) {
+      zombify(std::move(*t));
+    }
+  };
+  arena_.reset(arena, disposer);
+  return arena;
+}
+
+void ThreadCachedArena::zombify(SysArena&& arena) {
+  std::lock_guard<std::mutex> lock(zombiesMutex_);
+  zombies_.merge(std::move(arena));
+}
+
+}  // namespace folly
+
diff --git a/folly/ThreadCachedArena.h b/folly/ThreadCachedArena.h
new file mode 100644
index 00000000..2b87c121
--- /dev/null
+++ b/folly/ThreadCachedArena.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_THREADCACHEDARENA_H_
+#define FOLLY_THREADCACHEDARENA_H_
+
+#include <utility>
+#include <mutex>
+#include <limits>
+#include <boost/intrusive/slist.hpp>
+
+#include "folly/Likely.h"
+#include "folly/Arena.h"
+#include "folly/ThreadLocal.h"
+
+namespace folly {
+
+/**
+ * Thread-caching arena: allocate memory which gets freed when the arena gets
+ * destroyed.
+ *
+ * The arena itself allocates memory using malloc() in blocks of
+ * at least minBlockSize bytes.
+ *
+ * For speed, each thread gets its own Arena (see Arena.h); when threads
+ * exit, the Arena gets merged into a "zombie" Arena, which will be deallocated
+ * when the ThreadCachedArena object is destroyed.
+ */
+class ThreadCachedArena {
+ public:
+  explicit ThreadCachedArena(
+      size_t minBlockSize = SysArena::kDefaultMinBlockSize);
+
+  void* allocate(size_t size) {
+    SysArena* arena = arena_.get();
+    if (UNLIKELY(!arena)) {
+      arena = allocateThreadLocalArena();
+    }
+
+    return arena->allocate(size);
+  }
+
+  void deallocate(void* p) {
+    // Deallocate? Never!
+  }
+
+ private:
+  ThreadCachedArena(const ThreadCachedArena&) = delete;
+  ThreadCachedArena(ThreadCachedArena&&) = delete;
+  ThreadCachedArena& operator=(const ThreadCachedArena&) = delete;
+  ThreadCachedArena& operator=(ThreadCachedArena&&) = delete;
+
+  SysArena* allocateThreadLocalArena();
+
+  // Zombify the blocks in arena, saving them for deallocation until
+  // the ThreadCachedArena is destroyed.
+  void zombify(SysArena&& arena);
+
+  size_t minBlockSize_;
+  SysArena zombies_;  // allocated from threads that are now dead
+  std::mutex zombiesMutex_;
+  ThreadLocalPtr<SysArena> arena_;  // per-thread arena
+};
+
+}  // namespace folly
+
+#endif /* FOLLY_THREADCACHEDARENA_H_ */
+
diff --git a/folly/ThreadCachedInt.h b/folly/ThreadCachedInt.h
new file mode 100644
index 00000000..a255f199
--- /dev/null
+++ b/folly/ThreadCachedInt.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Higher performance (up to 10x) atomic increment using thread caching.
+ *
+ * @author Spencer Ahrens (sahrens)
+ */
+
+#ifndef FOLLY_THREADCACHEDINT_H
+#define FOLLY_THREADCACHEDINT_H
+
+#include <atomic>
+#include "folly/Likely.h"
+#include "folly/ThreadLocal.h"
+
+namespace folly {
+
+
+// Note that readFull requires holding a lock and iterating through all of the
+// thread local objects with the same Tag, so if you have a lot of
+// ThreadCachedInt's you should considering breaking up the Tag space even
+// further.
+template <class IntT, class Tag=IntT>
+class ThreadCachedInt : boost::noncopyable {
+  struct IntCache;
+
+ public:
+  explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
+    : target_(initialVal), cacheSize_(cacheSize) {
+  }
+
+  void increment(IntT inc) {
+    auto cache = cache_.get();
+    if (UNLIKELY(cache == NULL || cache->parent_ == NULL)) {
+      cache = new IntCache(*this);
+      cache_.reset(cache);
+    }
+    cache->increment(inc);
+  }
+
+  // Quickly grabs the current value which may not include some cached
+  // increments.
+  IntT readFast() const {
+    return target_.load(std::memory_order_relaxed);
+  }
+
+  // Reads the current value plus all the cached increments.  Requires grabbing
+  // a lock, so this is significantly slower than readFast().
+  IntT readFull() const {
+    IntT ret = readFast();
+    for (const auto& cache : cache_.accessAllThreads()) {
+      if (!cache.reset_.load(std::memory_order_acquire)) {
+        ret += cache.val_.load(std::memory_order_relaxed);
+      }
+    }
+    return ret;
+  }
+
+  // Quickly reads and resets current value (doesn't reset cached increments).
+  IntT readFastAndReset() {
+    return target_.exchange(0, std::memory_order_release);
+  }
+
+  // This function is designed for accumulating into another counter, where you
+  // only want to count each increment once.  It can still get the count a
+  // little off, however, but it should be much better than calling readFull()
+  // and set(0) sequentially.
+  IntT readFullAndReset() {
+    IntT ret = readFastAndReset();
+    for (auto& cache : cache_.accessAllThreads()) {
+      if (!cache.reset_.load(std::memory_order_acquire)) {
+        ret += cache.val_.load(std::memory_order_relaxed);
+        cache.reset_.store(true, std::memory_order_release);
+      }
+    }
+    return ret;
+  }
+
+  void setCacheSize(uint32_t newSize) {
+    cacheSize_.store(newSize, std::memory_order_release);
+  }
+
+  uint32_t getCacheSize() const {
+    return cacheSize_.load();
+  }
+
+  ThreadCachedInt& operator+=(IntT inc) { increment(inc); return *this; }
+  ThreadCachedInt& operator-=(IntT inc) { increment(-inc); return *this; }
+  // pre-increment (we don't support post-increment)
+  ThreadCachedInt& operator++() { increment(1); return *this; }
+  ThreadCachedInt& operator--() { increment(-1); return *this; }
+
+  // Thread-safe set function.
+  // This is a best effort implementation. In some edge cases, there could be
+  // data loss (missing counts)
+  void set(IntT newVal) {
+    for (auto& cache : cache_.accessAllThreads()) {
+      cache.reset_.store(true, std::memory_order_release);
+    }
+    target_.store(newVal, std::memory_order_release);
+  }
+
+  // This is a little tricky - it's possible that our IntCaches are still alive
+  // in another thread and will get destroyed after this destructor runs, so we
+  // need to make sure we signal that this parent is dead.
+  ~ThreadCachedInt() {
+    for (auto& cache : cache_.accessAllThreads()) {
+      cache.parent_ = NULL;
+    }
+  }
+
+ private:
+  std::atomic<IntT> target_;
+  std::atomic<uint32_t> cacheSize_;
+  ThreadLocalPtr<IntCache,Tag> cache_; // Must be last for dtor ordering
+
+  // This should only ever be modified by one thread
+  struct IntCache {
+    ThreadCachedInt* parent_;
+    mutable std::atomic<IntT> val_;
+    mutable uint32_t numUpdates_;
+    std::atomic<bool> reset_;
+
+    explicit IntCache(ThreadCachedInt& parent)
+        : parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}
+
+    void increment(IntT inc) {
+      if (LIKELY(!reset_.load(std::memory_order_acquire))) {
+        // This thread is the only writer to val_, so it's fine do do
+        // a relaxed load and do the addition non-atomically.
+        val_.store(
+          val_.load(std::memory_order_relaxed) + inc,
+          std::memory_order_release
+        );
+      } else {
+        val_.store(inc, std::memory_order_relaxed);
+        reset_.store(false, std::memory_order_release);
+      }
+      ++numUpdates_;
+      if (UNLIKELY(numUpdates_ >
+                   parent_->cacheSize_.load(std::memory_order_acquire))) {
+        flush();
+      }
+    }
+
+    void flush() const {
+      parent_->target_.fetch_add(val_, std::memory_order_release);
+      val_.store(0, std::memory_order_release);
+      numUpdates_ = 0;
+    }
+
+    ~IntCache() {
+      if (parent_) {
+        flush();
+      }
+    }
+  };
+};
+
+}
+
+#endif
diff --git a/folly/ThreadLocal.h b/folly/ThreadLocal.h
new file mode 100644
index 00000000..a449ce67
--- /dev/null
+++ b/folly/ThreadLocal.h
@@ -0,0 +1,346 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Improved thread local storage for non-trivial types (similar speed as
+ * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster
+ * than boost::thread_specific_ptr).
+ *
+ * Also includes an accessor interface to walk all the thread local child
+ * objects of a parent.  accessAllThreads() initializes an accessor which holds
+ * a global lock *that blocks all creation and destruction of ThreadLocal
+ * objects with the same Tag* and can be used as an iterable container.
+ *
+ * Intended use is for frequent write, infrequent read data access patterns such
+ * as counters.
+ *
+ * There are two classes here - ThreadLocal and ThreadLocalPtr.  ThreadLocalPtr
+ * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin
+ * wrapper around ThreadLocalPtr that manages allocation automatically.
+ *
+ * @author Spencer Ahrens (sahrens)
+ */
+
+#ifndef FOLLY_THREADLOCAL_H_
+#define FOLLY_THREADLOCAL_H_
+
+#include "folly/Portability.h"
+#include <boost/iterator/iterator_facade.hpp>
+#include "folly/Likely.h"
+#include <type_traits>
+
+// Use noexcept on gcc 4.6 or higher
+#undef FOLLY_NOEXCEPT
+#ifdef __GNUC__
+# ifdef HAVE_FEATURES_H
+#  include <features.h>
+#  if __GNUC_PREREQ(4,6)
+#    define FOLLY_NOEXCEPT noexcept
+#    define FOLLY_ASSERT(x) x
+#  endif
+# endif
+#endif
+
+#ifndef FOLLY_NOEXCEPT
+#  define FOLLY_NOEXCEPT
+#  define FOLLY_ASSERT(x) /**/
+#endif
+
+namespace folly {
+enum class TLPDestructionMode {
+  THIS_THREAD,
+  ALL_THREADS
+};
+}  // namespace
+
+#include "folly/detail/ThreadLocalDetail.h"
+
+namespace folly {
+
+template<class T, class Tag> class ThreadLocalPtr;
+
+template<class T, class Tag=void>
+class ThreadLocal {
+ public:
+  ThreadLocal() { }
+
+  T* get() const {
+    T* ptr = tlp_.get();
+    if (UNLIKELY(ptr == NULL)) {
+      ptr = new T();
+      tlp_.reset(ptr);
+    }
+    return ptr;
+  }
+
+  T* operator->() const {
+    return get();
+  }
+
+  T& operator*() const {
+    return *get();
+  }
+
+  void reset(T* newPtr = NULL) {
+    tlp_.reset(newPtr);
+  }
+
+  typedef typename ThreadLocalPtr<T,Tag>::Accessor Accessor;
+  Accessor accessAllThreads() const {
+    return tlp_.accessAllThreads();
+  }
+
+  // movable
+  ThreadLocal(ThreadLocal&&) = default;
+  ThreadLocal& operator=(ThreadLocal&&) = default;
+
+ private:
+  // non-copyable
+  ThreadLocal(const ThreadLocal&) = delete;
+  ThreadLocal& operator=(const ThreadLocal&) = delete;
+
+  mutable ThreadLocalPtr<T,Tag> tlp_;
+};
+
+/*
+ * The idea here is that __thread is faster than pthread_getspecific, so we
+ * keep a __thread array of pointers to objects (ThreadEntry::elements) where
+ * each array has an index for each unique instance of the ThreadLocalPtr
+ * object.  Each ThreadLocalPtr object has a unique id that is an index into
+ * these arrays so we can fetch the correct object from thread local storage
+ * very efficiently.
+ *
+ * In order to prevent unbounded growth of the id space and thus huge
+ * ThreadEntry::elements, arrays, for example due to continuous creation and
+ * destruction of ThreadLocalPtr objects, we keep a set of all active
+ * instances.  When an instance is destroyed we remove it from the active
+ * set and insert the id into freeIds_ for reuse.  These operations require a
+ * global mutex, but only happen at construction and destruction time.
+ *
+ * We use a single global pthread_key_t per Tag to manage object destruction and
+ * memory cleanup upon thread exit because there is a finite number of
+ * pthread_key_t's available per machine.
+ */
+
+template<class T, class Tag=void>
+class ThreadLocalPtr {
+ public:
+  ThreadLocalPtr() : id_(threadlocal_detail::StaticMeta<Tag>::create()) { }
+
+  ThreadLocalPtr(ThreadLocalPtr&& other) : id_(other.id_) {
+    other.id_ = 0;
+  }
+
+  ThreadLocalPtr& operator=(ThreadLocalPtr&& other) {
+    assert(this != &other);
+    destroy();
+    id_ = other.id_;
+    other.id_ = 0;
+    return *this;
+  }
+
+  ~ThreadLocalPtr() {
+    destroy();
+  }
+
+  T* get() const {
+    return static_cast<T*>(threadlocal_detail::StaticMeta<Tag>::get(id_).ptr);
+  }
+
+  T* operator->() const {
+    return get();
+  }
+
+  T& operator*() const {
+    return *get();
+  }
+
+  void reset(T* newPtr) {
+    threadlocal_detail::ElementWrapper& w =
+      threadlocal_detail::StaticMeta<Tag>::get(id_);
+    if (w.ptr != newPtr) {
+      w.dispose(TLPDestructionMode::THIS_THREAD);
+      w.set(newPtr);
+    }
+  }
+
+  /**
+   * reset() with a custom deleter:
+   * deleter(T* ptr, TLPDestructionMode mode)
+   * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus
+   * deleting pointers for all threads), and THIS_THREAD if we're only deleting
+   * the member for one thread (because of thread exit or reset())
+   */
+  template <class Deleter>
+  void reset(T* newPtr, Deleter deleter) {
+    threadlocal_detail::ElementWrapper& w =
+      threadlocal_detail::StaticMeta<Tag>::get(id_);
+    if (w.ptr != newPtr) {
+      w.dispose(TLPDestructionMode::THIS_THREAD);
+      w.set(newPtr, deleter);
+    }
+  }
+
+  // Holds a global lock for iteration through all thread local child objects.
+  // Can be used as an iterable container.
+  // Use accessAllThreads() to obtain one.
+  class Accessor {
+    friend class ThreadLocalPtr<T,Tag>;
+
+    threadlocal_detail::StaticMeta<Tag>& meta_;
+    boost::mutex* lock_;
+    int id_;
+
+   public:
+    class Iterator;
+    friend class Iterator;
+
+    // The iterators obtained from Accessor are bidirectional iterators.
+    class Iterator : public boost::iterator_facade<
+          Iterator,                               // Derived
+          T,                                      // value_type
+          boost::bidirectional_traversal_tag> {   // traversal
+      friend class Accessor;
+      friend class boost::iterator_core_access;
+      const Accessor* const accessor_;
+      threadlocal_detail::ThreadEntry* e_;
+
+      void increment() {
+        e_ = e_->next;
+        incrementToValid();
+      }
+
+      void decrement() {
+        e_ = e_->prev;
+        decrementToValid();
+      }
+
+      T& dereference() const {
+        return *static_cast<T*>(e_->elements[accessor_->id_].ptr);
+      }
+
+      bool equal(const Iterator& other) const {
+        return (accessor_->id_ == other.accessor_->id_ &&
+                e_ == other.e_);
+      }
+
+      explicit Iterator(const Accessor* accessor)
+        : accessor_(accessor),
+          e_(&accessor_->meta_.head_) {
+      }
+
+      bool valid() const {
+        return (e_->elements &&
+                accessor_->id_ < e_->elementsCapacity &&
+                e_->elements[accessor_->id_].ptr);
+      }
+
+      void incrementToValid() {
+        for (; e_ != &accessor_->meta_.head_ && !valid(); e_ = e_->next) { }
+      }
+
+      void decrementToValid() {
+        for (; e_ != &accessor_->meta_.head_ && !valid(); e_ = e_->prev) { }
+      }
+    };
+
+    ~Accessor() {
+      release();
+    }
+
+    Iterator begin() const {
+      return ++Iterator(this);
+    }
+
+    Iterator end() const {
+      return Iterator(this);
+    }
+
+    Accessor(const Accessor&) = delete;
+    Accessor& operator=(const Accessor&) = delete;
+
+    Accessor(Accessor&& other) FOLLY_NOEXCEPT
+      : meta_(other.meta_),
+        lock_(other.lock_),
+        id_(other.id_) {
+      other.id_ = 0;
+      other.lock_ = NULL;
+    }
+
+    Accessor& operator=(Accessor&& other) FOLLY_NOEXCEPT {
+      // Each Tag has its own unique meta, and accessors with different Tags
+      // have different types.  So either *this is empty, or this and other
+      // have the same tag.  But if they have the same tag, they have the same
+      // meta (and lock), so they'd both hold the lock at the same time,
+      // which is impossible, which leaves only one possible scenario --
+      // *this is empty.  Assert it.
+      assert(&meta_ == &other.meta_);
+      assert(lock_ == NULL);
+      using std::swap;
+      swap(lock_, other.lock_);
+      swap(id_, other.id_);
+    }
+
+    Accessor()
+      : meta_(threadlocal_detail::StaticMeta<Tag>::instance()),
+        lock_(NULL),
+        id_(0) {
+    }
+
+   private:
+    explicit Accessor(int id)
+      : meta_(threadlocal_detail::StaticMeta<Tag>::instance()),
+        lock_(&meta_.lock_) {
+      lock_->lock();
+      id_ = id;
+    }
+
+    void release() {
+      if (lock_) {
+        lock_->unlock();
+        id_ = 0;
+        lock_ = NULL;
+      }
+    }
+  };
+
+  // accessor allows a client to iterate through all thread local child
+  // elements of this ThreadLocal instance.  Holds a global lock for each <Tag>
+  Accessor accessAllThreads() const {
+    FOLLY_ASSERT(static_assert(!std::is_same<Tag, void>::value,
+                 "Must use a unique Tag to use the accessAllThreads feature"));
+    return Accessor(id_);
+  }
+
+ private:
+  void destroy() {
+    if (id_) {
+      threadlocal_detail::StaticMeta<Tag>::destroy(id_);
+    }
+  }
+
+  // non-copyable
+  ThreadLocalPtr(const ThreadLocalPtr&) = delete;
+  ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
+
+  int id_;  // every instantiation has a unique id
+};
+
+#undef FOLLY_NOEXCEPT
+
+}  // namespace folly
+
+#endif /* FOLLY_THREADLOCAL_H_ */
diff --git a/folly/TimeoutQueue.cpp b/folly/TimeoutQueue.cpp
new file mode 100644
index 00000000..f9eea10f
--- /dev/null
+++ b/folly/TimeoutQueue.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/TimeoutQueue.h"
+#include <algorithm>
+
+namespace folly {
+
+TimeoutQueue::Id TimeoutQueue::add(
+  int64_t now,
+  int64_t delay,
+  Callback callback) {
+  Id id = nextId_++;
+  timeouts_.insert({id, now + delay, -1, std::move(callback)});
+  return id;
+}
+
+TimeoutQueue::Id TimeoutQueue::addRepeating(
+  int64_t now,
+  int64_t interval,
+  Callback callback) {
+  Id id = nextId_++;
+  timeouts_.insert({id, now + interval, interval, std::move(callback)});
+  return id;
+}
+
+int64_t TimeoutQueue::nextExpiration() const {
+  return (timeouts_.empty() ? std::numeric_limits<int64_t>::max() :
+          timeouts_.get<BY_EXPIRATION>().begin()->expiration);
+}
+
+bool TimeoutQueue::erase(Id id) {
+  return timeouts_.get<BY_ID>().erase(id);
+}
+
+int64_t TimeoutQueue::runInternal(int64_t now, bool onceOnly) {
+  auto& byExpiration = timeouts_.get<BY_EXPIRATION>();
+  int64_t nextExp;
+  do {
+    auto end = byExpiration.upper_bound(now);
+    std::vector<Event> expired;
+    std::move(byExpiration.begin(), end, std::back_inserter(expired));
+    byExpiration.erase(byExpiration.begin(), end);
+    for (auto& event : expired) {
+      // Reinsert if repeating, do this before executing callbacks
+      // so the callbacks have a chance to call erase
+      if (event.repeatInterval >= 0) {
+        timeouts_.insert({event.id, now + event.repeatInterval,
+                          event.repeatInterval, event.callback});
+      }
+    }
+
+    // Call callbacks
+    for (auto& event : expired) {
+      event.callback(event.id, now);
+    }
+    nextExp = nextExpiration();
+  } while (!onceOnly && nextExp <= now);
+  return nextExp;
+}
+
+}  // namespace folly
+
diff --git a/folly/TimeoutQueue.h b/folly/TimeoutQueue.h
new file mode 100644
index 00000000..d3019254
--- /dev/null
+++ b/folly/TimeoutQueue.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Simple timeout queue.  Call user-specified callbacks when their timeouts
+ * expire.
+ *
+ * This class assumes that "time" is an int64_t and doesn't care about time
+ * units (seconds, milliseconds, etc).  You call runOnce() / runLoop() using
+ * the same time units that you use to specify callbacks.
+ *
+ * @author Tudor Bosman (tudorb@fb.com)
+ */
+
+#ifndef FOLLY_TIMEOUTQUEUE_H_
+#define FOLLY_TIMEOUTQUEUE_H_
+
+#include <stdint.h>
+#include <functional>
+#include <boost/multi_index_container.hpp>
+#include <boost/multi_index/indexed_by.hpp>
+#include <boost/multi_index/ordered_index.hpp>
+#include <boost/multi_index/member.hpp>
+
+namespace folly {
+
+class TimeoutQueue {
+ public:
+  typedef int64_t Id;
+  typedef std::function<void(Id, int64_t)> Callback;
+
+  TimeoutQueue() : nextId_(1) { }
+
+  /**
+   * Add a one-time timeout event that will fire "delay" time units from "now"
+   * (that is, the first time that run*() is called with a time value >= now
+   * + delay).
+   */
+  Id add(int64_t now, int64_t delay, Callback callback);
+
+  /**
+   * Add a repeating timeout event that will fire every "interval" time units
+   * (it will first fire when run*() is called with a time value >=
+   * now + interval).
+   *
+   * run*() will always invoke each repeating event at most once, even if
+   * more than one "interval" period has passed.
+   */
+  Id addRepeating(int64_t now, int64_t interval, Callback callback);
+
+  /**
+   * Erase a given timeout event, returns true if the event was actually
+   * erased and false if it didn't exist in our queue.
+   */
+  bool erase(Id id);
+
+  /**
+   * Process all events that are due at times <= "now" by calling their
+   * callbacks.
+   *
+   * Callbacks are allowed to call back into the queue and add / erase events;
+   * they might create more events that are already due.  In this case,
+   * runOnce() will only go through the queue once, and return a "next
+   * expiration" time in the past or present (<= now); runLoop()
+   * will process the queue again, until there are no events already due.
+   *
+   * Note that it is then possible for runLoop to never return if
+   * callbacks re-add themselves to the queue (or if you have repeating
+   * callbacks with an interval of 0).
+   *
+   * Return the time that the next event will be due (same as
+   * nextExpiration(), below)
+   */
+  int64_t runOnce(int64_t now) { return runInternal(now, true); }
+  int64_t runLoop(int64_t now) { return runInternal(now, false); }
+
+  /**
+   * Return the time that the next event will be due.
+   */
+  int64_t nextExpiration() const;
+
+ private:
+  int64_t runInternal(int64_t now, bool runOnce);
+  // noncopyable
+  TimeoutQueue(const TimeoutQueue&) = delete;
+  TimeoutQueue& operator=(const TimeoutQueue&) = delete;
+
+  struct Event {
+    Id id;
+    int64_t expiration;
+    int64_t repeatInterval;
+    Callback callback;
+  };
+
+  typedef boost::multi_index_container<
+    Event,
+    boost::multi_index::indexed_by<
+      boost::multi_index::ordered_unique<boost::multi_index::member<
+        Event, Id, &Event::id
+      >>,
+      boost::multi_index::ordered_non_unique<boost::multi_index::member<
+        Event, int64_t, &Event::expiration
+      >>
+    >
+  > Set;
+
+  enum {
+    BY_ID=0,
+    BY_EXPIRATION=1
+  };
+
+  Set timeouts_;
+  Id nextId_;
+};
+
+}  // namespace folly
+
+#endif /* FOLLY_TIMEOUTQUEUE_H_ */
+
diff --git a/folly/Traits.h b/folly/Traits.h
new file mode 100644
index 00000000..566036bb
--- /dev/null
+++ b/folly/Traits.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author: Andrei Alexandrescu
+
+#ifndef FOLLY_BASE_TRAITS_H_
+#define FOLLY_BASE_TRAITS_H_
+
+#include <boost/type_traits.hpp>
+#include <boost/mpl/and.hpp>
+#include <boost/mpl/not.hpp>
+#include <memory>
+#include <bits/c++config.h>
+
+namespace folly {
+
+/**
+ * IsRelocatable<T>::value describes the ability of moving around
+ * memory a value of type T by using memcpy (as opposed to the
+ * conservative approach of calling the copy constructor and then
+ * destroying the old temporary. Essentially for a relocatable type,
+ * the following two sequences of code should be semantically
+ * equivalent:
+ *
+ * void move1(T * from, T * to) {
+ *   new(to) T(from);
+ *   (*from).~T();
+ * }
+ *
+ * void move2(T * from, T * to) {
+ *   memcpy(from, to, sizeof(T));
+ * }
+ *
+ * Most C++ types are relocatable; the ones that aren't would include
+ * internal pointers or (very rarely) would need to update remote
+ * pointers to pointers tracking them. All C++ primitive types and
+ * type constructors are relocatable.
+ *
+ * This property can be used in a variety of optimizations. Currently
+ * fbvector uses this property intensively.
+ *
+ * The default conservatively assumes the type is not
+ * relocatable. Several specializations are defined for known
+ * types. You may want to add your own specializations. Do so in
+ * namespace folly and make sure you keep the specialization of
+ * IsRelocatable<SomeStruct> in the same header as SomeStruct.
+ */
+template <class T> struct IsRelocatable : boost::mpl::not_<boost::is_class<T> >
+{};
+
+} // namespace folly
+
+/**
+ * Use this macro ONLY inside namespace folly. When using it with a
+ * regular type, use it like this:
+ *
+ * // Make sure you're at namespace ::folly scope
+ * template<> FOLLY_ASSUME_RELOCATABLE(MyType)
+ *
+ * When using it with a template type, use it like this:
+ *
+ * // Make sure you're at namespace ::folly scope
+ * template<class T, class T2>
+ * FOLLY_ASSUME_RELOCATABLE(MyType<T1, T2>)
+ */
+#define FOLLY_ASSUME_RELOCATABLE(...) \
+  struct IsRelocatable<  __VA_ARGS__ > : ::boost::true_type {};
+
+/**
+ * Use this macro ONLY inside namespace boost. When using it with a
+ * regular type, use it like this:
+ *
+ * // Make sure you're at namespace ::boost scope
+ * template<> FOLLY_ASSUME_HAS_NOTHROW_CONSTRUCTOR(MyType)
+ *
+ * When using it with a template type, use it like this:
+ *
+ * // Make sure you're at namespace ::boost scope
+ * template<class T, class T2>
+ * FOLLY_ASSUME_HAS_NOTHROW_CONSTRUCTOR(MyType<T1, T2>)
+ */
+#define FOLLY_ASSUME_HAS_NOTHROW_CONSTRUCTOR(...) \
+  struct has_nothrow_constructor<  __VA_ARGS__ > : ::boost::true_type {};
+
+/**
+ * The FOLLY_ASSUME_FBVECTOR_COMPATIBLE* macros below encode two
+ * assumptions: first, that the type is relocatable per IsRelocatable
+ * above, and that it has a nothrow constructor. Most types can be
+ * assumed to satisfy both conditions, but it is the responsibility of
+ * the user to state that assumption. User-defined classes will not
+ * work with fbvector (see FBVector.h) unless they state this
+ * combination of properties.
+ *
+ * Use FOLLY_ASSUME_FBVECTOR_COMPATIBLE with regular types like this:
+ *
+ * FOLLY_ASSUME_FBVECTOR_COMPATIBLE(MyType)
+ *
+ * The versions FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1, _2, _3, and _4
+ * allow using the macro for describing templatized classes with 1, 2,
+ * 3, and 4 template parameters respectively. For template classes
+ * just use the macro with the appropriate number and pass the name of
+ * the template to it. Example:
+ *
+ * template <class T1, class T2> class MyType { ... };
+ * ...
+ * // Make sure you're at global scope
+ * FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2(MyType)
+ */
+
+// Use this macro ONLY at global level (no namespace)
+#define FOLLY_ASSUME_FBVECTOR_COMPATIBLE(...)                           \
+  namespace folly { template<> FOLLY_ASSUME_RELOCATABLE(__VA_ARGS__) }   \
+  namespace boost { \
+  template<> FOLLY_ASSUME_HAS_NOTHROW_CONSTRUCTOR(__VA_ARGS__) }
+// Use this macro ONLY at global level (no namespace)
+#define FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(...)                         \
+  namespace folly {                                                     \
+  template <class T1> FOLLY_ASSUME_RELOCATABLE(__VA_ARGS__<T1>) }       \
+    namespace boost {                                                   \
+    template <class T1> FOLLY_ASSUME_HAS_NOTHROW_CONSTRUCTOR(__VA_ARGS__<T1>) }
+// Use this macro ONLY at global level (no namespace)
+#define FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2(...)                 \
+  namespace folly {                                             \
+  template <class T1, class T2>                                 \
+  FOLLY_ASSUME_RELOCATABLE(__VA_ARGS__<T1, T2>) }               \
+    namespace boost {                                           \
+    template <class T1, class T2>                               \
+    FOLLY_ASSUME_HAS_NOTHROW_CONSTRUCTOR(__VA_ARGS__<T1, T2>) }
+// Use this macro ONLY at global level (no namespace)
+#define FOLLY_ASSUME_FBVECTOR_COMPATIBLE_3(...)                         \
+  namespace folly {                                                     \
+  template <class T1, class T2, class T3>                               \
+  FOLLY_ASSUME_RELOCATABLE(__VA_ARGS__<T1, T2, T3>) }                   \
+    namespace boost {                                                   \
+    template <class T1, class T2, class T3>                             \
+    FOLLY_ASSUME_HAS_NOTHROW_CONSTRUCTOR(__VA_ARGS__<T1, T2, T3>) }
+// Use this macro ONLY at global level (no namespace)
+#define FOLLY_ASSUME_FBVECTOR_COMPATIBLE_4(...)                         \
+  namespace folly {                                                     \
+  template <class T1, class T2, class T3, class T4>                     \
+  FOLLY_ASSUME_RELOCATABLE(__VA_ARGS__<T1, T2, T3, T4>) }               \
+    namespace boost {                                                   \
+    template <class T1, class T2, class T3, class T4>                   \
+    FOLLY_ASSUME_HAS_NOTHROW_CONSTRUCTOR(__VA_ARGS__<T1, T2, T3, T4>) }
+
+/**
+ * Instantiate FOLLY_ASSUME_FBVECTOR_COMPATIBLE for a few types. It is
+ * safe to assume that pair is compatible if both of its components
+ * are. Furthermore, all STL containers can be assumed to comply,
+ * although that is not guaranteed by the standard.
+ */
+
+namespace std {
+
+template <class T, class U>
+  class pair;
+#ifndef _GLIBCXX_USE_FB
+template <class T, class R, class A>
+  class basic_string;
+#else
+template <class T, class R, class A, class S>
+  class basic_string;
+#endif
+template <class T, class A>
+  class vector;
+template <class T, class A>
+  class deque;
+template <class T, class A>
+  class list;
+template <class T, class C, class A>
+  class set;
+template <class K, class V, class C, class A>
+  class map;
+template <class T>
+  class shared_ptr;
+
+}
+
+namespace boost {
+
+template <class T> class shared_ptr;
+
+template <class T, class U>
+struct has_nothrow_constructor< std::pair<T, U> >
+    : ::boost::mpl::and_< has_nothrow_constructor<T>,
+                          has_nothrow_constructor<U> > {};
+
+} // namespace boost
+
+namespace folly {
+
+// STL commonly-used types
+template <class T, class U>
+struct IsRelocatable<  std::pair<T, U> >
+    : ::boost::mpl::and_< IsRelocatable<T>, IsRelocatable<U> > {};
+
+// Is T one of T1, T2, ..., Tn?
+template <class T, class... Ts>
+struct IsOneOf {
+  enum { value = false };
+};
+
+template <class T, class T1, class... Ts>
+struct IsOneOf<T, T1, Ts...> {
+  enum { value = std::is_same<T, T1>::value || IsOneOf<T, Ts...>::value };
+};
+
+} // namespace folly
+
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_3(std::basic_string);
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2(std::vector);
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2(std::list);
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2(std::deque);
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_4(std::map);
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_3(std::set);
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_2(std::unique_ptr);
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(std::shared_ptr);
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(std::function);
+
+// Boost
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(boost::shared_ptr);
+
+#endif //FOLLY_BASE_TRAITS_H_
diff --git a/folly/Unicode.cpp b/folly/Unicode.cpp
new file mode 100644
index 00000000..7ac48931
--- /dev/null
+++ b/folly/Unicode.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Unicode.h"
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+fbstring codePointToUtf8(char32_t cp) {
+  fbstring result;
+
+  // Based on description from http://en.wikipedia.org/wiki/UTF-8.
+
+  if (cp <= 0x7f) {
+    result.resize(1);
+    result[0] = static_cast<char>(cp);
+  } else if (cp <= 0x7FF) {
+    result.resize(2);
+    result[1] = static_cast<char>(0x80 | (0x3f & cp));
+    result[0] = static_cast<char>(0xC0 | (cp >> 6));
+  } else if (cp <= 0xFFFF) {
+    result.resize(3);
+    result[2] = static_cast<char>(0x80 | (0x3f & cp));
+    result[1] = (0x80 | static_cast<char>((0x3f & (cp >> 6))));
+    result[0] = (0xE0 | static_cast<char>(cp >> 12));
+  } else if (cp <= 0x10FFFF) {
+    result.resize(4);
+    result[3] = static_cast<char>(0x80 | (0x3f & cp));
+    result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
+    result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
+    result[0] = static_cast<char>(0xF0 | (cp >> 18));
+  }
+
+  return result;
+}
+
+//////////////////////////////////////////////////////////////////////
+
+}
+
diff --git a/folly/Unicode.h b/folly/Unicode.h
new file mode 100644
index 00000000..b0723a07
--- /dev/null
+++ b/folly/Unicode.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Some utility routines relating to unicode.
+
+#ifndef FOLLY_UNICODE_H_
+#define FOLLY_UNICODE_H_
+
+#include "folly/FBString.h"
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * Encode a single unicode code point into a UTF-8 byte sequence.
+ *
+ * Return value is undefined if `cp' is an invalid code point.
+ */
+fbstring codePointToUtf8(char32_t cp);
+
+//////////////////////////////////////////////////////////////////////
+
+}
+
+#endif
diff --git a/folly/build/generate_escape_tables.py b/folly/build/generate_escape_tables.py
new file mode 100755
index 00000000..e84179e7
--- /dev/null
+++ b/folly/build/generate_escape_tables.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+#
+# Generate Escape tables.
+# Copyright 2011 Facebook
+#
+# @author Tudor Bosman (tudorb@fb.com)
+#
+import os
+from optparse import OptionParser
+
+OUTPUT_FILE = "EscapeTables.cpp"
+
+def generate(f):
+    f.write("namespace folly {\n"
+            "namespace detail {\n"
+            "\n")
+    f.write("extern const char cEscapeTable[] =\n")
+    escapes = dict((
+        ('"', '\\"'),
+        ('\\', '\\\\'),
+        ('?', '?'),
+        ('\n', 'n'),
+        ('\r', 'r'),
+        ('\t', 't'),
+    ))
+    for i in range(0, 256):
+        if i % 64 == 0:
+            if i != 0:
+                f.write("\"\n")
+            f.write("  \"")
+        c = chr(i)
+        if c in escapes:
+            c = escapes[c]
+        elif i < 32 or i > 126:
+            c = 'O'  # octal
+        else:
+            c = 'P'  # printable
+        f.write(c)
+    f.write("\";\n"
+            "\n")
+
+    f.write("extern const char cUnescapeTable[] =\n")
+    for i in range(0, 256):
+        if i % 64 == 0:
+            if i != 0:
+                f.write("\"\n")
+            f.write("  \"")
+        c = chr(i)
+        if c in '\'?':
+            f.write(c)
+        elif c in '"\\abfnrtv':
+            f.write("\\" + c)
+        elif i >= ord('0') and i <= ord('7'):
+            f.write("O")  # octal
+        elif c == "x":
+            f.write("X")  # hex
+        else:
+            f.write("I")  # invalid
+    f.write("\";\n"
+            "\n"
+            "extern const unsigned char hexTable[] = {")
+
+    for i in range(0, 256):
+        if i % 16 == 0:
+            f.write("\n  ")
+        if i >= ord('0') and i <= ord('9'):
+            f.write("{0:2d}, ".format(i - ord('0')))
+        elif i >= ord('a') and i <= ord('f'):
+            f.write("{0:2d}, ".format(i - ord('a') + 10))
+        elif i >= ord('A') and i <= ord('F'):
+            f.write("{0:2d}, ".format(i - ord('A') + 10))
+        else:
+            f.write("16, ")
+    f.write("\n};\n"
+            "\n")
+
+    f.write("}  // namespace detail\n"
+            "}  // namespace folly\n")
+
+def main():
+    parser = OptionParser()
+    parser.add_option("--install_dir", dest="install_dir", default=".",
+                      help="write output to DIR", metavar="DIR")
+    parser.add_option("--fbcode_dir")
+    (options, args) = parser.parse_args()
+    f = open(os.path.join(options.install_dir, OUTPUT_FILE), "w")
+    generate(f)
+    f.close()
+
+if __name__ == "__main__":
+    main()
diff --git a/folly/build/generate_format_tables.py b/folly/build/generate_format_tables.py
new file mode 100755
index 00000000..e464ed2a
--- /dev/null
+++ b/folly/build/generate_format_tables.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#
+# Generate Format tables
+
+
+import os
+from optparse import OptionParser
+
+OUTPUT_FILE = "FormatTables.cpp"
+
+def generate_table(f, type_name, name, map):
+    f.write("extern const {0} {1}[] = {{".format(type_name, name))
+    for i in range(0, 256):
+        if i % 2 == 0:
+            f.write("\n  ")
+        f.write("{0}::{1}, ".format(type_name, map.get(chr(i), "INVALID")))
+    f.write("\n};\n\n")
+
+def generate_conv_table(f, name, values):
+    values = list(values)
+    line = ''
+    for i, v in enumerate(values):
+        if i == 0:
+            f.write("extern const char {0}[{1}][{2}] = {{\n".format(
+                name, len(values), len(v)))
+        row = "{{{0}}}, ".format(", ".join("'{0}'".format(x) for x in v))
+        if len(line) + len(row) > 79:
+            f.write(line + "\n")
+            line = ''
+        line += row
+    if line:
+        f.write(line + "\n")
+    f.write("};\n\n")
+
+def octal_values():
+    return (tuple("{0:03o}".format(x)) for x in xrange(512))
+
+def hex_values(upper):
+    fmt = "{0:02X}" if upper else "{0:02x}"
+    return (tuple(fmt.format(x)) for x in xrange(256))
+
+def binary_values():
+    return (tuple("{0:08b}".format(x)) for x in xrange(256))
+
+def generate(f):
+    f.write("#include \"folly/FormatArg.h\"\n"
+            "\n"
+            "namespace folly {\n"
+            "namespace detail {\n"
+            "\n")
+
+    generate_table(
+        f, "FormatArg::Align", "formatAlignTable",
+        {"<": "LEFT", ">": "RIGHT", "=": "PAD_AFTER_SIGN", "^": "CENTER"})
+
+    generate_table(
+        f, "FormatArg::Sign", "formatSignTable",
+        {"+": "PLUS_OR_MINUS", "-": "MINUS", " ": "SPACE_OR_MINUS"})
+
+    generate_conv_table(f, "formatOctal", octal_values())
+    generate_conv_table(f, "formatHexLower", hex_values(False))
+    generate_conv_table(f, "formatHexUpper", hex_values(True))
+    generate_conv_table(f, "formatBinary", binary_values())
+
+    f.write("}  // namespace detail\n"
+            "}  // namespace folly\n")
+
+def main():
+    parser = OptionParser()
+    parser.add_option("--install_dir", dest="install_dir", default=".",
+                      help="write output to DIR", metavar="DIR")
+    parser.add_option("--fbcode_dir")
+    (options, args) = parser.parse_args()
+    f = open(os.path.join(options.install_dir, OUTPUT_FILE), "w")
+    generate(f)
+    f.close()
+
+if __name__ == "__main__":
+    main()
diff --git a/folly/build/generate_varint_tables.py b/folly/build/generate_varint_tables.py
new file mode 100755
index 00000000..f677f4d6
--- /dev/null
+++ b/folly/build/generate_varint_tables.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+#
+# Generate tables for GroupVarint32
+# Copyright 2011 Facebook
+#
+# @author Tudor Bosman (tudorb@fb.com)
+#
+# Reference: http://www.stepanovpapers.com/CIKM_2011.pdf
+#
+# From 17 encoded bytes, we may use between 5 and 17 bytes to encode 4
+# integers.  The first byte is a key that indicates how many bytes each of
+# the 4 integers takes:
+#
+# bit 0..1: length-1 of first integer
+# bit 2..3: length-1 of second integer
+# bit 4..5: length-1 of third integer
+# bit 6..7: length-1 of fourth integer
+#
+# The value of the first byte is used as the index in a table which returns
+# a mask value for the SSSE3 PSHUFB instruction, which takes an XMM register
+# (16 bytes) and shuffles bytes from it into a destination XMM register
+# (optionally setting some of them to 0)
+#
+# For example, if the key has value 4, that means that the first integer
+# uses 1 byte, the second uses 2 bytes, the third and fourth use 1 byte each,
+# so we set the mask value so that
+#
+# r[0] = a[0]
+# r[1] = 0
+# r[2] = 0
+# r[3] = 0
+#
+# r[4] = a[1]
+# r[5] = a[2]
+# r[6] = 0
+# r[7] = 0
+#
+# r[8] = a[3]
+# r[9] = 0
+# r[10] = 0
+# r[11] = 0
+#
+# r[12] = a[4]
+# r[13] = 0
+# r[14] = 0
+# r[15] = 0
+
+import os
+from optparse import OptionParser
+
+OUTPUT_FILE = "GroupVarintTables.cpp"
+
+def generate(f):
+    f.write("#include <stdint.h>\n"
+            "#include <x86intrin.h>\n"
+            "\n"
+            "namespace folly {\n"
+            "namespace detail {\n"
+            "\n"
+            "extern const __m128i groupVarintSSEMasks[] = {\n")
+
+    # Compute SSE masks
+    for i in range(0, 256):
+        offset = 0
+        vals = [0, 0, 0, 0]
+        for j in range(0, 4):
+            d = 1 + ((i >> (2 * j)) & 3)
+            # the j'th integer uses d bytes, consume them
+            for k in range(0, d):
+                vals[j] |= offset << (8 * k)
+                offset += 1
+            # set remaining bytes in result to 0
+            # 0xff: set corresponding byte in result to 0
+            for k in range(d, 4):
+                vals[j] |= 0xff << (8 * k)
+        f.write("  {{0x{1:08x}{0:08x}U, 0x{3:08x}{2:08x}U}},\n".format(*vals))
+
+    f.write("};\n"
+            "\n"
+            "extern const uint8_t groupVarintLengths[] = {\n")
+
+    # Also compute total encoded lengths, including key byte
+    for i in range(0, 256):
+        offset = 1  # include key byte
+        for j in range(0, 4):
+            d = 1 + ((i >> (2 * j)) & 3)
+            offset += d
+        f.write("  {0},\n".format(offset))
+
+    f.write("};\n"
+            "\n"
+            "}  // namespace detail\n"
+            "}  // namespace folly\n")
+
+def main():
+    parser = OptionParser()
+    parser.add_option("--install_dir", dest="install_dir", default=".",
+                      help="write output to DIR", metavar="DIR")
+    parser.add_option("--fbcode_dir")
+    (options, args) = parser.parse_args()
+    f = open(os.path.join(options.install_dir, OUTPUT_FILE), "w")
+    generate(f)
+    f.close()
+
+if __name__ == "__main__":
+    main()
diff --git a/folly/configure.ac b/folly/configure.ac
new file mode 100644
index 00000000..9048e5f2
--- /dev/null
+++ b/folly/configure.ac
@@ -0,0 +1,92 @@
+
+#                                               -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.59)
+AC_INIT(folly, 0.1, folly@fb.com)
+AC_CONFIG_SRCDIR([Likely.h])
+AC_CONFIG_HEADERS([config.h])
+AX_PREFIX_CONFIG_H([folly-config.h], [folly], [config.h])
+AC_CONFIG_AUX_DIR([build-aux])
+
+AM_INIT_AUTOMAKE([foreign dist-bzip2])
+
+AC_CONFIG_MACRO_DIR([m4])
+
+AC_PROG_INSTALL
+AM_PROG_LIBTOOL
+
+AC_LANG([C++])
+
+# Checks for programs.
+AC_PROG_CXX
+AC_PROG_CC
+AC_CXX_COMPILE_STDCXX_0X
+
+# Checks for libraries.
+AC_CHECK_LIB([glog],[openlog],[],[AC_MSG_ERROR(
+             [Please install google-glog library])])
+AC_CHECK_LIB([gflags],[getenv],[],[AC_MSG_ERROR(
+             [Please install google-gflags library])])
+
+# check for boost libs
+AX_BOOST_BASE
+AX_BOOST_THREAD
+AX_BOOST_REGEX
+
+# Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([fcntl.h inttypes.h limits.h stdint.h stdlib.h string.h sys/time.h unistd.h mutex.h features.h malloc.h])
+
+AC_CHECK_HEADER(double-conversion.h, [], [AC_MSG_ERROR(
+                [Couldn't find double-conversion.h, please download from \
+                http://code.google.com/p/double-conversion/])], [])
+AC_CHECK_LIB([double_conversion_pic],[ceil],[],[AC_MSG_ERROR(
+             [Please install double-conversion library])])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_HEADER_STDBOOL
+AC_C_CONST
+AC_C_INLINE
+AC_TYPE_SIZE_T
+AC_HEADER_TIME
+AC_C_VOLATILE
+AC_CHECK_TYPES([ptrdiff_t])
+
+# Checks for library functions.
+AC_CHECK_FUNCS([getdelim \
+                gettimeofday \
+                memmove \
+                memset \
+                pow \
+                strerror \
+                pthread_yield \
+                ffsll \
+                rallocm \
+                malloc_size \
+                malloc_usable_size])
+
+if test "$ac_cv_func_pthread_yield" = "no"; then
+   AC_CHECK_HEADERS([sched.h])
+   AC_CHECK_FUNCS([sched_yield])
+fi
+
+CXX_FLAGS=""
+if test "$ac_cv_cxx_compile_cxx0x_cxx" = yes; then
+   CXX_FLAGS="-std=c++0x"
+fi
+if test "$ac_cv_cxx_compile_cxx0x_gxx" = yes; then
+   CXX_FLAGS="-std=gnu++0x"
+fi
+
+AC_SUBST(AM_CPPFLAGS, '-I../$(top_srcdir)'" "'-I$(top_srcdir)/io'" "'-I$(top_srcdir)/test'" -lstdc++ $CXX_FLAGS $BOOST_CPPFLAGS")
+AC_SUBST(AM_CXXFLAGS, "$BOOST_THREAD_LIB")
+
+AM_CONDITIONAL([HAVE_STD_THREAD], [test "$ac_cv_header_features" = "yes"])
+AM_CONDITIONAL([HAVE_X86_64], [test "$build_cpu" = "x86_64"])
+
+# Output
+AC_CONFIG_FILES([Makefile
+                 test/Makefile
+                 test/function_benchmark/Makefile])
+AC_OUTPUT
diff --git a/folly/detail/AtomicHashUtils.h b/folly/detail/AtomicHashUtils.h
new file mode 100644
index 00000000..3ee20c16
--- /dev/null
+++ b/folly/detail/AtomicHashUtils.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Some utilities used by AtomicHashArray and AtomicHashMap
+//
+// Note: no include guard; different -inl.h files include this and
+// undef it more than once in a translation unit.
+
+#if !(defined(__x86__) || defined(__i386__) || defined(__x86_64__))
+#define FOLLY_SPIN_WAIT(condition)                \
+   for (int counter = 0; condition; ++counter) {  \
+     if (counter < 10000) continue;               \
+     pthread_yield();                             \
+   }
+#else
+#define FOLLY_SPIN_WAIT(condition)              \
+  for (int counter = 0; condition; ++counter) { \
+    if (counter < 10000) {                      \
+      asm volatile("pause");                    \
+      continue;                                 \
+    }                                           \
+    pthread_yield();                            \
+  }
+#endif
diff --git a/folly/detail/BitIteratorDetail.h b/folly/detail/BitIteratorDetail.h
new file mode 100644
index 00000000..3cb24000
--- /dev/null
+++ b/folly/detail/BitIteratorDetail.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_DETAIL_BITITERATORDETAIL_H_
+#define FOLLY_DETAIL_BITITERATORDETAIL_H_
+
+#include <iterator>
+#include <type_traits>
+#include <boost/iterator/iterator_adaptor.hpp>
+
+namespace folly {
+
+template <class BaseIter> class BitIterator;
+
+namespace bititerator_detail {
+
+// Reference to a bit.
+// Templatize on both parent reference and value types to capture
+// const-ness correctly and to work with the case where Ref is a
+// reference-like type (not T&), just like our BitReference here.
+template <class Ref, class Value>
+class BitReference {
+ public:
+  BitReference(Ref r, size_t bit) : ref_(r), bit_(bit) { }
+
+  operator bool() const {
+    return ref_ & (one_ << bit_);
+  }
+
+  BitReference& operator=(bool b) {
+    if (b) {
+      set();
+    } else {
+      clear();
+    }
+    return *this;
+  }
+
+  void set() {
+    ref_ |= (one_ << bit_);
+  }
+
+  void clear() {
+    ref_ &= ~(one_ << bit_);
+  }
+
+  void flip() {
+    ref_ ^= (one_ << bit_);
+  }
+
+ private:
+  // shortcut to avoid writing static_cast everywhere
+  const static Value one_ = 1;
+
+  Ref ref_;
+  size_t bit_;
+};
+
+template <class BaseIter>
+struct BitIteratorBase {
+  static_assert(std::is_integral<typename BaseIter::value_type>::value,
+                "BitIterator may only be used with integral types");
+  typedef boost::iterator_adaptor<
+    BitIterator<BaseIter>,      // Derived
+    BaseIter,                   // Base
+    bool,                       // Value
+    boost::use_default,         // CategoryOrTraversal
+    bititerator_detail::BitReference<
+      typename std::iterator_traits<BaseIter>::reference,
+      typename std::iterator_traits<BaseIter>::value_type
+    >,  // Reference
+    ssize_t> type;
+};
+
+
+}  // namespace bititerator_detail
+}  // namespace folly
+
+#endif /* FOLLY_DETAIL_BITITERATORDETAIL_H_ */
+
diff --git a/folly/detail/DiscriminatedPtrDetail.h b/folly/detail/DiscriminatedPtrDetail.h
new file mode 100644
index 00000000..5cfa9ae8
--- /dev/null
+++ b/folly/detail/DiscriminatedPtrDetail.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_DETAIL_DISCRIMINATEDPTRDETAIL_H_
+#define FOLLY_DETAIL_DISCRIMINATEDPTRDETAIL_H_
+
+#include <type_traits>
+
+namespace folly {
+namespace dptr_detail {
+
+/**
+ * Given a target type and a list of types, return the 1-based index of the
+ * type in the list of types.  Fail to compile if the target type doesn't
+ * appear in the list.
+ *
+ * GetIndex<int, void, char, int>::value == 3
+ * GetIndex<int, void, char>::value -> fails to compile
+ */
+template <typename... Types> struct GetTypeIndex;
+
+// When recursing, we never reach the 0- or 1- template argument base case
+// unless the target type is not in the list.  If the target type is in the
+// list, we stop recursing when it is at the head of the remaining type
+// list via the GetTypeIndex<T, T, Types...> partial specialization.
+template <typename T, typename... Types>
+struct GetTypeIndex<T, T, Types...> {
+  static const size_t value = 1;
+};
+
+template <typename T, typename U, typename... Types>
+struct GetTypeIndex<T, U, Types...> {
+  static const size_t value = 1 + GetTypeIndex<T, Types...>::value;
+};
+
+// Generalize std::is_same for variable number of type arguments
+template <typename... Types>
+struct IsSameType;
+
+template <>
+struct IsSameType<> {
+  static const bool value = true;
+};
+
+template <typename T>
+struct IsSameType<T> {
+  static const bool value = true;
+};
+
+template <typename T, typename U, typename... Types>
+struct IsSameType<T, U, Types...> {
+  static const bool value =
+    std::is_same<T,U>::value && IsSameType<U, Types...>::value;
+};
+
+// Define type as the type of all T in (non-empty) Types..., asserting that
+// all types in Types... are the same.
+template <typename... Types>
+struct SameType;
+
+template <typename T, typename... Types>
+struct SameType<T, Types...> {
+  typedef T type;
+  static_assert(IsSameType<T, Types...>::value,
+                "Not all types in pack are the same");
+};
+
+// Determine the result type of applying a visitor of type V on a pointer
+// to type T.
+template <typename V, typename T>
+struct VisitorResult1 {
+  typedef typename std::result_of<V (T*)>::type type;
+};
+
+// Determine the result type of applying a visitor of type V on a const pointer
+// to type T.
+template <typename V, typename T>
+struct ConstVisitorResult1 {
+  typedef typename std::result_of<V (const T*)>::type type;
+};
+
+// Determine the result type of applying a visitor of type V on pointers of
+// all types in Types..., asserting that the type is the same for all types
+// in Types...
+template <typename V, typename... Types>
+struct VisitorResult {
+  typedef typename SameType<
+    typename VisitorResult1<V,Types>::type...>::type type;
+};
+
+// Determine the result type of applying a visitor of type V on const pointers
+// of all types in Types..., asserting that the type is the same for all types
+// in Types...
+template <typename V, typename... Types>
+struct ConstVisitorResult {
+  typedef typename SameType<
+    typename ConstVisitorResult1<V,Types>::type...>::type type;
+};
+
+template <typename V, typename R, typename... Types> struct ApplyVisitor1;
+
+template <typename V, typename R>
+struct ApplyVisitor1<V, R> {
+  R operator()(size_t index, V&& visitor, void* ptr) const {
+    CHECK(false);  // NOTREACHED
+  }
+};
+
+template <typename V, typename R, typename T, typename... Types>
+struct ApplyVisitor1<V, R, T, Types...> {
+  R operator()(size_t index, V&& visitor, void* ptr) const {
+    return (index == 1 ? visitor(static_cast<T*>(ptr)) :
+            ApplyVisitor1<V, R, Types...>()(
+              index - 1, std::forward<V>(visitor), ptr));
+  }
+};
+
+template <typename V, typename R, typename... Types> struct ApplyConstVisitor1;
+
+template <typename V, typename R>
+struct ApplyConstVisitor1<V, R> {
+  R operator()(size_t index, V&& visitor, void* ptr) const {
+    CHECK(false);  // NOTREACHED
+  }
+};
+
+template <typename V, typename R, typename T, typename... Types>
+struct ApplyConstVisitor1<V, R, T, Types...> {
+  R operator()(size_t index, V&& visitor, void* ptr) const {
+    return (index == 1 ? visitor(static_cast<const T*>(ptr)) :
+            ApplyConstVisitor1<V, R, Types...>()(
+              index - 1, std::forward<V>(visitor), ptr));
+  }
+};
+
+template <typename V, typename... Types>
+struct ApplyVisitor
+  : ApplyVisitor1<
+      V, typename VisitorResult<V, Types...>::type, Types...> {
+};
+
+template <typename V, typename... Types>
+struct ApplyConstVisitor
+  : ApplyConstVisitor1<
+      V, typename ConstVisitorResult<V, Types...>::type, Types...> {
+};
+
+}  // namespace dptr_detail
+}  // namespace folly
+
+#endif /* FOLLY_DETAIL_DISCRIMINATEDPTRDETAIL_H_ */
+
diff --git a/folly/detail/GroupVarintDetail.h b/folly/detail/GroupVarintDetail.h
new file mode 100644
index 00000000..3f90872f
--- /dev/null
+++ b/folly/detail/GroupVarintDetail.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_DETAIL_GROUPVARINTDETAIL_H_
+#define FOLLY_DETAIL_GROUPVARINTDETAIL_H_
+
+#include <stddef.h>
+
+namespace folly {
+
+template <typename T>
+class GroupVarint;
+
+namespace detail {
+
+template <typename T>
+struct GroupVarintTraits;
+
+template <>
+struct GroupVarintTraits<uint32_t> {
+  enum {
+    kGroupSize = 4,
+    kHeaderSize = 1,
+  };
+};
+
+template <>
+struct GroupVarintTraits<uint64_t> {
+  enum {
+    kGroupSize = 5,
+    kHeaderSize = 2,
+  };
+};
+
+template <typename T>
+class GroupVarintBase {
+ protected:
+  typedef GroupVarintTraits<T> Traits;
+  enum { kHeaderSize = Traits::kHeaderSize };
+
+ public:
+  typedef T type;
+
+  /**
+   * Number of integers encoded / decoded in one pass.
+   */
+  enum { kGroupSize = Traits::kGroupSize };
+
+  /**
+   * Maximum encoded size.
+   */
+  enum { kMaxSize = kHeaderSize + sizeof(type) * kGroupSize };
+
+  /**
+   * Maximum size for n values.
+   */
+  static size_t maxSize(size_t n) {
+    // Full groups
+    size_t total = (n / kGroupSize) * kFullGroupSize;
+    // Incomplete last group, if any
+    n %= kGroupSize;
+    if (n) {
+      total += kHeaderSize + n * sizeof(type);
+    }
+    return total;
+  }
+
+  /**
+   * Size of n values starting at p.
+   */
+  static size_t totalSize(const T* p, size_t n) {
+    size_t size = 0;
+    for (; n >= kGroupSize; n -= kGroupSize, p += kGroupSize) {
+      size += Derived::size(p);
+    }
+    if (n) {
+      size += Derived::partialSize(p, n);
+    }
+    return size;
+  }
+
+ private:
+  typedef GroupVarint<T> Derived;
+  enum { kFullGroupSize = kHeaderSize + kGroupSize * sizeof(type) };
+};
+
+}  // namespace detail
+}  // namespace folly
+
+#endif /* FOLLY_DETAIL_GROUPVARINTDETAIL_H_ */
+
diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h
new file mode 100644
index 00000000..1d8fc7aa
--- /dev/null
+++ b/folly/detail/ThreadLocalDetail.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_DETAIL_THREADLOCALDETAIL_H_
+#define FOLLY_DETAIL_THREADLOCALDETAIL_H_
+
+#include <limits.h>
+#include <pthread.h>
+#include <list>
+#include <string>
+#include <vector>
+
+#include <boost/thread/mutex.hpp>
+
+#include <glog/logging.h>
+
+#include "folly/Foreach.h"
+#include "folly/Malloc.h"
+
+namespace folly {
+namespace threadlocal_detail {
+
+/**
+ * Base class for deleters.
+ */
+class DeleterBase {
+ public:
+  virtual ~DeleterBase() { }
+  virtual void dispose(void* ptr, TLPDestructionMode mode) const = 0;
+};
+
+/**
+ * Simple deleter class that calls delete on the passed-in pointer.
+ */
+template <class Ptr>
+class SimpleDeleter : public DeleterBase {
+ public:
+  virtual void dispose(void* ptr, TLPDestructionMode mode) const {
+    delete static_cast<Ptr>(ptr);
+  }
+};
+
+/**
+ * Custom deleter that calls a given callable.
+ */
+template <class Ptr, class Deleter>
+class CustomDeleter : public DeleterBase {
+ public:
+  explicit CustomDeleter(Deleter d) : deleter_(d) { }
+  virtual void dispose(void* ptr, TLPDestructionMode mode) const {
+    deleter_(static_cast<Ptr>(ptr), mode);
+  }
+ private:
+  Deleter deleter_;
+};
+
+
+/**
+ * POD wrapper around an element (a void*) and an associated deleter.
+ * This must be POD, as we memset() it to 0 and memcpy() it around.
+ */
+struct ElementWrapper {
+  void dispose(TLPDestructionMode mode) {
+    if (ptr != NULL) {
+      DCHECK(deleter != NULL);
+      deleter->dispose(ptr, mode);
+      if (ownsDeleter) {
+        delete deleter;
+      }
+      ptr = NULL;
+      deleter = NULL;
+      ownsDeleter = false;
+    }
+  }
+
+  template <class Ptr>
+  void set(Ptr p) {
+    DCHECK(ptr == NULL);
+    DCHECK(deleter == NULL);
+
+    if (p) {
+      // We leak a single object here but that is ok.  If we used an
+      // object directly, there is a chance that the destructor will be
+      // called on that static object before any of the ElementWrappers
+      // are disposed and that isn't so nice.
+      static auto d = new SimpleDeleter<Ptr>();
+      ptr = p;
+      deleter = d;
+      ownsDeleter = false;
+    }
+  }
+
+  template <class Ptr, class Deleter>
+  void set(Ptr p, Deleter d) {
+    DCHECK(ptr == NULL);
+    DCHECK(deleter == NULL);
+    if (p) {
+      ptr = p;
+      deleter = new CustomDeleter<Ptr,Deleter>(d);
+      ownsDeleter = true;
+    }
+  }
+
+  void* ptr;
+  DeleterBase* deleter;
+  bool ownsDeleter;
+};
+
+/**
+ * Per-thread entry.  Each thread using a StaticMeta object has one.
+ * This is written from the owning thread only (under the lock), read
+ * from the owning thread (no lock necessary), and read from other threads
+ * (under the lock).
+ */
+struct ThreadEntry {
+  ElementWrapper* elements;
+  size_t elementsCapacity;
+  ThreadEntry* next;
+  ThreadEntry* prev;
+};
+
+// Held in a singleton to track our global instances.
+// We have one of these per "Tag", by default one for the whole system
+// (Tag=void).
+//
+// Creating and destroying ThreadLocalPtr objects, as well as thread exit
+// for threads that use ThreadLocalPtr objects collide on a lock inside
+// StaticMeta; you can specify multiple Tag types to break that lock.
+template <class Tag>
+struct StaticMeta {
+  static StaticMeta<Tag>& instance() {
+    // Leak it on exit, there's only one per process and we don't have to
+    // worry about synchronization with exiting threads.
+    static bool constructed = (inst = new StaticMeta<Tag>());
+    return *inst;
+  }
+
+  int nextId_;
+  std::vector<int> freeIds_;
+  boost::mutex lock_;
+  pthread_key_t pthreadKey_;
+  ThreadEntry head_;
+
+  void push_back(ThreadEntry* t) {
+    t->next = &head_;
+    t->prev = head_.prev;
+    head_.prev->next = t;
+    head_.prev = t;
+  }
+
+  void erase(ThreadEntry* t) {
+    t->next->prev = t->prev;
+    t->prev->next = t->next;
+    t->next = t->prev = t;
+  }
+
+  static __thread ThreadEntry threadEntry_;
+  static StaticMeta<Tag>* inst;
+
+  StaticMeta() : nextId_(1) {
+    head_.next = head_.prev = &head_;
+    int ret = pthread_key_create(&pthreadKey_, &onThreadExit);
+    if (ret != 0) {
+      std::string msg;
+      switch (ret) {
+        case EAGAIN:
+          char buf[100];
+          snprintf(buf, sizeof(buf), "PTHREAD_KEYS_MAX (%d) is exceeded",
+                   PTHREAD_KEYS_MAX);
+          msg = buf;
+          break;
+        case ENOMEM:
+          msg = "Out-of-memory";
+          break;
+        default:
+          msg = "(unknown error)";
+      }
+      throw std::runtime_error("pthread_key_create failed: " + msg);
+    }
+  }
+  ~StaticMeta() {
+    LOG(FATAL) << "StaticMeta lives forever!";
+  }
+
+  static void onThreadExit(void* ptr) {
+    auto & meta = instance();
+    DCHECK_EQ(ptr, &meta);
+    // We wouldn't call pthread_setspecific unless we actually called get()
+    DCHECK_NE(threadEntry_.elementsCapacity, 0);
+    {
+      boost::lock_guard<boost::mutex> g(meta.lock_);
+      meta.erase(&threadEntry_);
+      // No need to hold the lock any longer; threadEntry_ is private to this
+      // thread now that it's been removed from meta.
+    }
+    FOR_EACH_RANGE(i, 0, threadEntry_.elementsCapacity) {
+      threadEntry_.elements[i].dispose(TLPDestructionMode::THIS_THREAD);
+    }
+    free(threadEntry_.elements);
+    threadEntry_.elements = NULL;
+    pthread_setspecific(meta.pthreadKey_, NULL);
+  }
+
+  static int create() {
+    int id;
+    auto & meta = instance();
+    boost::lock_guard<boost::mutex> g(meta.lock_);
+    if (!meta.freeIds_.empty()) {
+      id = meta.freeIds_.back();
+      meta.freeIds_.pop_back();
+    } else {
+      id = meta.nextId_++;
+    }
+    return id;
+  }
+
+  static void destroy(int id) {
+    try {
+      auto & meta = instance();
+      // Elements in other threads that use this id.
+      std::vector<ElementWrapper> elements;
+      {
+        boost::lock_guard<boost::mutex> g(meta.lock_);
+        for (ThreadEntry* e = meta.head_.next; e != &meta.head_; e = e->next) {
+          if (id < e->elementsCapacity && e->elements[id].ptr) {
+            elements.push_back(e->elements[id]);
+
+            // Writing another thread's ThreadEntry from here is fine;
+            // the only other potential reader is the owning thread --
+            // from onThreadExit (which grabs the lock, so is properly
+            // synchronized with us) or from get() -- but using get() on a
+            // ThreadLocalPtr object that's being destroyed is a bug, so
+            // undefined behavior is fair game.
+            e->elements[id].ptr = NULL;
+            e->elements[id].deleter = NULL;
+          }
+        }
+        meta.freeIds_.push_back(id);
+      }
+      // Delete elements outside the lock
+      FOR_EACH(it, elements) {
+        it->dispose(TLPDestructionMode::ALL_THREADS);
+      }
+    } catch (...) { // Just in case we get a lock error or something anyway...
+      LOG(WARNING) << "Destructor discarding an exception that was thrown.";
+    }
+  }
+
+  static ElementWrapper& get(int id) {
+    size_t prevSize = threadEntry_.elementsCapacity;
+    if (prevSize <= id) {
+      size_t newSize = static_cast<size_t>((id + 5) * 1.7);
+      auto & meta = instance();
+      ElementWrapper* ptr = NULL;
+      // Rely on jemalloc to zero the memory if possible -- maybe it knows
+      // it's already zeroed and saves us some work.
+      if (!usingJEMalloc() ||
+          prevSize < jemallocMinInPlaceExpandable ||
+          (rallocm(
+              static_cast<void**>(static_cast<void*>(&threadEntry_.elements)),
+              NULL, newSize, 0, ALLOCM_NO_MOVE | ALLOCM_ZERO) !=
+           ALLOCM_SUCCESS)) {
+        // Sigh, must realloc, but we can't call realloc here, as elements is
+        // still linked in meta, so another thread might access invalid memory
+        // after realloc succeeds.  We'll copy by hand and update threadEntry_
+        // under the lock.
+        if ((ptr = static_cast<ElementWrapper*>(
+              malloc(sizeof(ElementWrapper) * newSize))) != NULL) {
+          memcpy(ptr, threadEntry_.elements,
+                 sizeof(ElementWrapper) * prevSize);
+          memset(ptr + prevSize, 0,
+                 (newSize - prevSize) * sizeof(ElementWrapper));
+        } else {
+          throw std::bad_alloc();
+        }
+      }
+
+      // Success, update the entry
+      {
+        boost::lock_guard<boost::mutex> g(meta.lock_);
+        if (prevSize == 0) {
+          meta.push_back(&threadEntry_);
+        }
+        if (ptr) {
+          using std::swap;
+          swap(ptr, threadEntry_.elements);
+        }
+        threadEntry_.elementsCapacity = newSize;
+      }
+
+      free(ptr);
+
+      if (prevSize == 0) {
+        pthread_setspecific(meta.pthreadKey_, &meta);
+      }
+    }
+    return threadEntry_.elements[id];
+  }
+};
+
+template <class Tag> __thread ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
+template <class Tag> StaticMeta<Tag>* StaticMeta<Tag>::inst = nullptr;
+
+}  // namespace threadlocal_detail
+}  // namespace folly
+
+#endif /* FOLLY_DETAIL_THREADLOCALDETAIL_H_ */
+
diff --git a/folly/docs/.gitignore b/folly/docs/.gitignore
new file mode 100644
index 00000000..2d19fc76
--- /dev/null
+++ b/folly/docs/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/folly/docs/AtomicHashMap.md b/folly/docs/AtomicHashMap.md
new file mode 100644
index 00000000..6c5f2546
--- /dev/null
+++ b/folly/docs/AtomicHashMap.md
@@ -0,0 +1,134 @@
+`folly/AtomicHashmap.h`
+----------------------
+
+`folly/AtomicHashmap.h` introduces a synchronized UnorderedAssociativeContainer
+implementation designed for extreme performance in heavily multithreaded
+environments (about 2-5x faster than tbb::concurrent_hash_map) and good memory
+usage properties.  Find and iteration are wait-free, insert has key-level lock
+granularity, there is minimal memory overhead, and permanent 32-bit ids can be
+used to reference each element.
+
+
+### Limitations
+***
+
+Although it can provide extreme performance, AtomicHashmap has some unique
+limitations as well.
+
+* The space for erased elements cannot be reclaimed (they are tombstoned
+forever) so it's generally not a good idea to use this if you're erasing things
+a lot.
+
+* Only supports 32 or 64 bit keys - this is because they must be atomically
+compare-and-swap'ed.
+
+* Growth beyond initialization reduces performance - if you don't know
+the approximate number of elements you'll be inserting into the map, you
+probably shouldn't use this class.
+
+* Must manage syncronization externally in order to modify values in the map
+after insertion.  Lock pools are a common way to do this, or you may
+consider using `folly::PackedSyncPtr<T>` as your `ValueT`.
+
+* Must define special reserved key values for empty, erased, and locked
+elements.
+
+For a complete list of limitations and departures from the
+UnorderedAssociativeContainer concept, see `folly/AtomicHashMap.h`
+
+
+### Unique Features
+***
+
+* `value_type` references remain valid as long as the map itself.  Note this is
+not true for most other probing hash maps which will move elements when
+rehashing, which is necessary for them to grow.  AtomicHashMap grows by chaining
+additional slabs, so elements never need to be moved.
+
+* Unique 32-bit ids can be used to reference elements in the map via
+`iterator::getIndex()`.  This can be helpful to save memory in the rest of the
+application by replacing 64-bit pointers or keys.
+
+* Iterators are never invalidated.  This means you can iterate through the map
+while simultaneously inserting and erasing.  This is particularly useful for
+non-blocking map serialization.
+
+
+### Usage
+***
+
+Usage is similar to most maps, although note the conspicuous lack of operator[]
+which encourages non thread-safe access patterns.
+
+Below is a synchronized key counter implementation that allows the counter
+values to be incremented in parallel with serializing all the values to a
+string.
+
+```Cpp
+   class Counters {
+    private:
+     AtomicHashMap<int64_t,int64_t> ahm;
+
+    public:
+     explicit Counters(size_t numCounters) : ahm(numCounters) {}
+
+     void increment(int64_t obj_id) {
+       auto ret = ahm.insert(make_pair(obj_id, 1));
+       if (!ret.first) {
+         // obj_id already exists, increment
+         NoBarrier_AtomicIncrement(&ret.first->second, 1);
+       }
+     }
+
+     int64_t getValue(int64_t obj_id) {
+       auto ret = ahm.find(obj_id);
+       return ret != ahm.end() ? ret->second : 0;
+     }
+
+     // Serialize the counters without blocking increments
+     string toString() {
+       string ret = "{\n";
+       ret.reserve(ahm.size() * 32);
+       for (const auto& e : ahm) {
+         ret += folly::to<string>(
+           "  [", e.first, ":", NoBarrier_Load(&e.second), "]\n");
+       }
+       ret += "}\n";
+       return ret;
+     }
+   };
+```
+
+### Implementation
+***
+
+AtomicHashMap is a composition of AtomicHashArray submaps, which implement the
+meat of the functionality.  Only one AHA is created on initialization, and
+additional submaps are appended if the first one gets full.  If the AHM grows,
+there will be multiple submaps that must be probed in series to find a given
+key.  The more growth, the more submaps will be chained, and the slower it will
+get.  If the initial size estimate is good, only one submap will ever be created
+and performance will be optimal.
+
+AtomicHashArray is a fixed-size probing hash map (also referred to as an open
+addressed hash map) where hash collisions are resolved by checking subsequent
+elements.  This means that they can be allocated in slabs as arrays of
+value_type elements, have excellent cache performance, and have no memory
+overhead from storing pointers.
+
+The algorithm is simple - when inserting, the key is hash-mod'ed to an offset,
+and that element-key is atomically compare-and-swap'ed with the locked key
+value.  If successful, the value is written and the element-key is unlocked by
+setting it to the input key value.  If the compare fails, the next element is
+tried until success or the map is full.
+
+Finds are even simpler.  The key is hash-mod'ed to an offset, and the
+element-key is examined.  If it is the same as the input key, the reference is
+returned, if it's the empty key, failure is returned, otherwise the next key is
+tried.  This can be done wait-free without any atomic instructions because the
+elements are always in a valid state.
+
+Erase is done by finding the key, then compare-and-swap'ing the element-key with
+the reserved erased key value.  If the swap succeeds, return success, otherwise
+return failure (the element was erased by a competing thread).  If the key does
+not exist, return failure.
diff --git a/folly/docs/Benchmark.md b/folly/docs/Benchmark.md
new file mode 100644
index 00000000..635a261d
--- /dev/null
+++ b/folly/docs/Benchmark.md
@@ -0,0 +1,281 @@
+`folly/Benchmark.h`
+-----------------
+
+`folly/Benchmark.h` provides a simple framework for writing and
+executing benchmarks. Currently the framework targets only
+single-threaded testing (though you can internally use fork-join
+parallelism and measure total run time).
+
+To use this library, you need to be using gcc 4.6 or later. Include
+`folly/Benchmark.h` and make sure `folly/benchmark.cpp` is part of the
+build (either directly or packaged with a library).
+
+### Overview
+***
+
+Using `folly/Benchmark.h` is very simple. Here's an example:
+
+``` Cpp
+    #include "folly/Benchmark.h"
+    #include "folly/Foreach.h"
+    #include <vector>
+    using namespace std;
+    using namespace folly;
+    BENCHMARK(insertFrontVector) {
+      // Let's insert 100 elements at the front of a vector
+      vector<int> v;
+      FOR_EACH_RANGE (i, 0, 100) {
+        v.insert(v.begin(), i);
+      }
+    }
+    BENCHMARK(insertBackVector) {
+      // Let's insert 100 elements at the back of a vector
+      vector<int> v;
+      FOR_EACH_RANGE (i, 0, 100) {
+        v.insert(v.end(), i);
+      }
+    }
+    int main() {
+      runBenchmarks();
+    }
+```
+
+Compiling and running this code produces to the standard output:
+
+```
+    ===============================================================================
+    test.cpp                                              relative ns/iter  iters/s
+    ===============================================================================
+    insertFrontVector                                                3.84K  260.38K
+    insertBackVector                                                 1.61K  622.75K
+    ===============================================================================
+```
+
+Let's worry about the empty column "relative" later. The table
+contains, for each benchmark, the time spent per call and the converse
+number of calls per second. Numbers are represented in metric notation
+(K for thousands, M for millions etc). As expected, in this example
+the second function is much faster (fewer ns/iter and more iters/s).
+
+The macro `BENCHMARK` introduces a function and also adds it to an
+internal array containing all benchmarks in the system. The defined
+function takes no arguments and returns `void`.
+
+The framework calls the function many times to collect statistics
+about it. Sometimes the function itself would want to do that
+iteration---for example how about inserting `n` elements instead of
+100 elements? To do the iteration internally, use `BENCHMARK` with two
+parameters. The second parameter is the number of iterations and is
+passed by the framework down to the function. The type of the count is
+implicitly `unsigned`. Consider a slightly reworked example:
+
+``` Cpp
+    #include "folly/Benchmark.h"
+    #include "folly/Foreach.h"
+    #include <vector>
+    using namespace std;
+    using namespace folly;
+    BENCHMARK(insertFrontVector, n) {
+      vector<int> v;
+      FOR_EACH_RANGE (i, 0, n) {
+        v.insert(v.begin(), i);
+      }
+    }
+    BENCHMARK(insertBackVector, n) {
+      vector<int> v;
+      FOR_EACH_RANGE (i, 0, n) {
+        v.insert(v.end(), i);
+      }
+    }
+    int main() {
+      runBenchmarks();
+    }
+```
+
+The produced numbers are substantially different:
+
+```
+    ===============================================================================
+    Benchmark                                             relative ns/iter  iters/s
+    ===============================================================================
+    insertFrontVector                                               39.92    25.05M
+    insertBackVector                                                 3.46   288.89M
+    ===============================================================================
+```
+
+Now the numbers indicate the speed of one single insertion because the
+framework assumed the user-defined function used internal iteration
+(which it does). So inserting at the back of a vector is more than 10
+times faster than inserting at the front! Speaking of comparisons...
+
+### Baselines
+***
+
+Choosing one or more good baselines is a crucial activity in any
+measurement. Without a baseline there is little information to derive
+from the sheer numbers. If, for example, you do experimentation with
+algorithms, a good baseline is often an established approach (e.g. the
+built-in `std::sort` for sorting). Essentially all experimental
+numbers should be compared against some baseline.
+
+To support baseline-driven measurements, `folly/Benchmark.h` defines
+`BENCHMARK_RELATIVE`, which works much like `BENCHMARK`, except it
+considers the most recent lexically-ocurring `BENCHMARK` a baseline,
+and fills the "relative" column. Say, for example, we want to use
+front insertion for a vector as a baseline and see how back insertion
+compares with it:
+
+``` Cpp
+    #include "folly/Benchmark.h"
+    #include "folly/Foreach.h"
+    #include <vector>
+    using namespace std;
+    using namespace folly;
+    BENCHMARK(insertFrontVector, n) {
+      vector<int> v;
+      FOR_EACH_RANGE (i, 0, n) {
+        v.insert(v.begin(), i);
+      }
+    }
+    BENCHMARK_RELATIVE(insertBackVector, n) {
+      vector<int> v;
+      FOR_EACH_RANGE (i, 0, n) {
+        v.insert(v.end(), i);
+      }
+    }
+    int main() {
+      runBenchmarks();
+    }
+```
+
+This program prints something like:
+
+```
+    ===============================================================================
+    Benchmark                                             relative ns/iter  iters/s
+    ===============================================================================
+    insertFrontVector                                               42.65    23.45M
+    insertBackVector                                     1208.24%    3.53   283.30M
+    ===============================================================================
+```
+
+showing the 1208.24% relative speed advantage of inserting at the back
+compared to front. The scale is chosen in such a way that 100% means
+identical speed, numbers smaller than 100% indicate the benchmark is
+slower than the baseline, and numbers greater than 100% indicate the
+benchmark is faster. For example, if you see 42% that means the speed
+of the benchmark is 0.42 of the baseline speed. If you see 123%, it
+means the benchmark is 23% or 1.23 times faster.
+
+To close the current benchmark group and start another, simply use
+`BENCHMARK` again.
+
+### Ars Gratia Artis
+***
+
+If you want to draw a horizontal line of dashes (e.g. at the end of a
+group or for whatever reason), use `BENCHMARK_DRAW_LINE()`. The line
+fulfills a purely aesthetic role; it doesn't interact with
+measurements in any way.
+
+### Suspending a benchmark
+***
+
+Sometimes benchmarking code must to some preparation work that is
+physically inside the benchmark function, but should not take part to
+its time budget. To temporarily suspend the benchmark, use the
+pseudo-statement `SUSPEND_BENCHMARK` as follows:
+
+``` Cpp
+    BENCHMARK(insertBackVector, n) {
+      vector<int> v;
+      SUSPEND_BENCHMARK {
+        v.reserve(n);
+      }
+      FOR_EACH_RANGE (i, 0, n) {
+        v.insert(v.end(), i);
+      }
+    }
+```
+
+The preallocation effected with `v.reserve(n)` will not count toward
+the total run time of the benchmark.
+
+Only the main thread should call `SUSPEND_BENCHMARK` (and of course it
+should not call it while other threads are doing actual work). This is
+because the timer is application-global.
+
+If the scope introduced by `SUSPEND_BENCHMARK` is not desired, you may
+want to "manually" use the `BenchmarkSuspender` type. Constructing
+such an object suspends time measurement, and destroying it resumes
+the measurement. If you want to resume time measurement before the
+destructor, call `dismiss` against the `BenchmarkSuspender`
+object. The previous example could have been written like this:
+
+``` Cpp
+    BENCHMARK(insertBackVector, n) {
+      BenchmarkSuspender braces;
+      vector<int> v;
+      v.reserve(n);
+      braces.dismiss();
+      FOR_EACH_RANGE (i, 0, n) {
+        v.insert(v.end(), i);
+      }
+    }
+```
+
+### `doNotOptimizeAway`
+***
+
+Finally, the small utility function `doNotOptimizeAway` prevents
+compiler optimizations that may interfere with benchmarking . Call
+doNotOptimizeAway(var) against variables that you use for
+benchmarking but otherwise are useless. The compiler tends to do a
+good job at eliminating unused variables, and this function fools it
+into thinking a variable is in fact needed. Example:
+
+``` Cpp
+    BENCHMARK(fpOps, n) {
+      double d = 1;
+      FOR_EACH_RANGE (i, 1, n) {
+        d += i;
+        d -= i;
+        d *= i;
+        d /= i;
+      }
+      doNotOptimizeAway(d);
+    }
+```
+
+### A look under the hood
+***
+
+`folly/Benchmark.h` has a simple, systematic approach to collecting
+timings.
+
+First, it organizes measurements in several large epochs, and takes
+the minimum over all epochs. Taking the minimum gives the closest
+result to the real runtime. Benchmark timings are not a regular random
+variable that fluctuates around an average. Instead, the real time
+we're looking for is one to which there's a variety of additive noise
+(i.e. there is no noise that could actually shorten the benchmark time
+below its real value). In theory, taking an infinite amount of samples
+and keeping the minimum is the actual time that needs
+measuring. That's why the accuracy of benchmarking increases with the
+number of epochs.
+
+Clearly, in real functioning there will also be noise and a variety of
+effects caused by the running context. But the noise during the
+benchmark (straight setup, simple looping) is a poor model for the
+noise in the real application. So taking the minimum across several
+epochs is the most informative result.
+
+Inside each epoch, the function measured is iterated an increasing
+number of times until the total runtime is large enough to make noise
+negligible. At that point the time is collected, and the time per
+iteration is computed. As mentioned, the minimum time per iteration
+over all epochs is the final result.
+
+The timer function used is `clock_gettime` with the `CLOCK_REALTIME`
+clock id. Note that you must use a recent Linux kernel (2.6.38 or
+newer), otherwise the resolution of `CLOCK_REALTIME` is inadequate.
diff --git a/folly/docs/Conv.md b/folly/docs/Conv.md
new file mode 100644
index 00000000..8d285e9d
--- /dev/null
+++ b/folly/docs/Conv.md
@@ -0,0 +1,217 @@
+`folly/Conv.h`
+-------------
+
+`folly/Conv.h` is a one-stop-shop for converting values across
+types. Its main features are simplicity of the API (only the
+names `to` and `toAppend` must be memorized), speed
+(folly is significantly faster, sometimes by an order of magnitude,
+than comparable APIs), and correctness.
+
+### Synopsis
+***
+
+All examples below are assume to have included `folly/Conv.h`
+and issued `using namespace folly;` You will need:
+
+``` Cpp
+    // To format as text and append to a string, use toAppend.
+    fbstring str;
+    toAppend(2.5, &str);
+    CHECK_EQ(str, "2.5");
+
+    // Multiple arguments are okay, too. Just put the pointer to string at the end.
+    toAppend(" is ", 2, " point ", 5, &str);
+    CHECK_EQ(str, "2.5 is 2 point 5");
+
+    // You don't need to use fbstring (although it's much faster for conversions and in general).
+    std::string stdStr;
+    toAppend("Pi is about ", 22.0 / 7, &stdStr);
+    // In general, just use to<TargetType>(sourceValue). It returns its result by value.
+    stdStr = to<std::string>("Variadic ", "arguments also accepted.");
+
+    // to<fbstring> is 2.5x faster than to<std::string> for typical workloads.
+    str = to<fbstring>("Variadic ", "arguments also accepted.");
+```
+
+### Integral-to-integral conversion
+***
+
+Using `to<Target>(value)` to convert one integral type to another
+will behave as follows:
+
+* If the target type can accommodate all possible values of the
+  source value, the value is implicitly converted. No further
+  action is taken. Example:
+
+``` Cpp
+        short x;
+        unsigned short y;
+        ...
+        auto a = to<int>(x); // zero overhead conversion
+        auto b = to<int>(y); // zero overhead conversion
+```
+
+* Otherwise, `to` inserts bounds checks and throws
+  `std::range_error` if the target type cannot accommodate the
+  source value. Example:
+
+``` Cpp
+    short x;
+    unsigned short y;
+    long z;
+    ...
+    x = 123;
+    auto a = to<unsigned short>(x); // fine
+    x = -1;
+    a = to<unsigned short>(x); // THROWS
+    z = 2000000000;
+    auto b = to<int>(z); // fine
+    z += 1000000000;
+    b = to<int>(z); // THROWS
+    auto b = to<unsigned int>(z); // fine
+```
+
+### Anything-to-string conversion
+***
+
+As mentioned, there are two primitives for converting anything to
+string: `to` and `toAppend`. They support the same set of source
+types, literally by definition (`to` is implemented in terms of
+`toAppend` for all types). The call `toAppend(value, &str)`
+formats and appends `value` to `str` whereas
+`to<StringType>(value)` formats `value` as a `StringType` and
+returns the result by value. Currently, the supported
+`StringType`s are `std::string` and `fbstring`
+
+Both `toAppend` and `to` with a string type as a target support
+variadic arguments. Each argument is converted in turn. For
+`toAppend` the last argument in a variadic list must be the
+address of a supported string type (no need to specify the string
+type as a template argument).
+
+#### Integral-to-string conversion
+
+Nothing special here - integrals are converted to strings in
+decimal format, with a '-' prefix for negative values. Example:
+
+``` Cpp
+    auto a = to<fbstring>(123);
+    assert(a == "123");
+    a = to<fbstring>(-456);
+    assert(a == "-456");
+```
+
+The conversion implementation is aggressively optimized. It
+converts two digits at a time assisted by fixed-size tables.
+Converting a `long` to an `fbstring` is 3.6x faster than using
+`boost::lexical_cast` and 2.5x faster than using `sprintf` even
+though the latter is used in conjunction with a stack-allocated
+constant-size buffer.
+
+Note that converting integral types to `fbstring` has a
+particular advantage compared to converting to `std::string`
+No integral type (<= 64 bits) has more than 20 decimal digits
+including sign. Since `fbstring` employs the small string
+optimization for up to 23 characters, converting an integral
+to `fbstring` is guaranteed to not allocate memory, resulting
+in significant speed and memory locality gains. Benchmarks
+reveal a 2x gain on a typical workload.
+
+#### `char` to string conversion
+
+Although `char` is technically an integral type, most of the time
+you want the string representation of `'a'` to be `"a"`, not `96`
+That's why `folly/Conv.h` handles `char` as a special case that
+does the expected thing. Note that `signed char` and `unsigned
+char` are still considered integral types.
+
+
+#### Floating point to string conversion
+
+`folly/Conv.h` uses [V8's double conversion](http://code.google.com/p/double-conversion/)
+routines. They are accurate and fast; on typical workloads,
+`to<fbstring>(doubleValue)` is 1.9x faster than `sprintf` and
+5.5x faster than `boost::lexical_cast` (It is also 1.3x faster
+than `to<std::string>(doubleValue)`
+
+#### `const char*` to string conversion
+
+For completeness, `folly/Conv.h` supports `const char*` including
+i.e. string literals. The "conversion" consists, of course, of
+the string itself. Example:
+
+``` Cpp
+    auto s = to<fbstring>("Hello, world");
+    assert(s == "Hello, world");
+```
+
+#### Anything from string conversion (i.e. parsing)
+***
+
+`folly/Conv.h` includes three kinds of parsing routines:
+
+* `to<Type>(const char* begin, const char* end)` rigidly
+  converts the range [begin, end) to `Type` These routines have
+  drastic restrictions (e.g. allow no leading or trailing
+  whitespace) and are intended as an efficient back-end for more
+  tolerant routines.
+* `to<Type>(stringy)` converts `stringy` to `Type` Value
+  `stringy` may be of type `const char*`, `StringPiece`,
+  `std::string`, or `fbstring` (Technically, the requirement is
+  that `stringy` implicitly converts to a `StringPiece`
+* `to<Type>(&stringPiece)` parses with progress information:
+  given `stringPiece` of type `StringPiece` it parses as much
+  as possible from it as type `Type` and alters `stringPiece`
+  to remove the munched characters. This is easiest clarified
+  by an example:
+
+``` Cpp
+    fbstring s = " 1234 angels on a pin";
+    StringPiece pc(s);
+    auto x = to<int>(&pc);
+    assert(x == 1234);
+    assert(pc == " angels on a pin";
+```
+
+Note how the routine ate the leading space but not the trailing one.
+
+#### Parsing integral types
+
+Parsing integral types is unremarkable - decimal format is
+expected, optional `'+'` or `'-'` sign for signed types, but no
+optional `'+'` is allowed for unsigned types. The one remarkable
+element is speed - parsing typical `long` values is 6x faster than
+`sscanf`. `folly/Conv.h` uses aggressive loop unrolling and
+table-assisted SIMD-style code arrangement that avoids integral
+division (slow) and data dependencies across operations
+(ILP-unfriendly). Example:
+
+``` Cpp
+    fbstring str = "  12345  ";
+    assert(to<int>(str) == 12345);
+    str = "  12345six seven eight";
+    StringPiece pc(str);
+    assert(to<int>(&pc) == 12345);
+    assert(str == "six seven eight");
+```
+
+#### Parsing floating-point types
+
+`folly/Conv.h` uses, again, [V8's double-conversion](http://code.google.com/p/double-conversion/)
+routines as back-end. The speed is 3x faster than `sscanf` and
+1.7x faster than in-home routines such as `parse<double>` But
+the more important detail is accuracy - even if you do code a
+routine that works faster than `to<double>` chances are it is
+incorrect and will fail in a variety of corner cases. Using
+`to<double>` is strongly recommended.
+
+Note that if an unparsable string is passed to `to<double>` `NaN`
+is returned, which can be tested for as follows:
+
+``` Cpp
+    fbstring str = "not a double";
+    double d = to<double>(str);
+    if (std::isnan(d)) {
+      // string could not be parsed
+    }
+```
diff --git a/folly/docs/Dynamic.md b/folly/docs/Dynamic.md
new file mode 100644
index 00000000..9a2beee3
--- /dev/null
+++ b/folly/docs/Dynamic.md
@@ -0,0 +1,189 @@
+`folly/dynamic.h`
+-----------------
+
+`folly/dynamic.h` provides a runtime dynamically typed value for
+C++, similar to the way languages with runtime type systems work
+(e.g. Python). It can hold types from a predetermined set of types
+(ints, bools, arrays of other dynamics, etc), similar to something like
+`boost::variant`, but the syntax is intended to be a little more like
+using the native type directly.
+
+To use `dynamic`, you need to be using gcc 4.6 or later. You'll want to
+include `folly/dynamic.h` (or perhaps also `folly/json.h`).
+
+### Overview
+***
+
+Here are some code samples to get started (assumes a `using
+folly::dynamic;` was used):
+
+``` Cpp
+    dynamic twelve = 12; // creates a dynamic that holds an integer
+    dynamic str = "string"; // yep, this one is an fbstring
+
+    // A few other types.
+    dynamic nul = nullptr;
+    dynamic boolean = false;
+
+    // Arrays can be initialized with brackets.
+    dynamic array = { "array ", "of ", 4, " elements" };
+    assert(array.size() == 4);
+    dynamic emptyArray = {};
+    assert(array.empty());
+
+    // Maps from dynamics to dynamics are called objects.  The
+    // dynamic::object constant is how you make an empty map from dynamics
+    // to dynamics.
+    dynamic map = dynamic::object;
+    map["something"] = 12;
+    map["another_something"] = map["something"] * 2;
+
+    // Dynamic objects may be intialized this way
+    dynamic map2 = dynamic::object("something", 12)("another_something", 24);
+```
+
+### Runtime Type Checking and Conversions
+***
+
+Any operation on a dynamic requires checking at runtime that the
+type is compatible with the operation. If it isn't, you'll get a
+`folly::TypeError`. Other exceptions can also be thrown if
+you try to do something impossible (e.g. if you put a very large
+64-bit integer in and try to read it out as a double).
+
+More examples should hopefully clarify this:
+
+``` Cpp
+    dynamic dint = 42;
+
+    dynamic str = "foo";
+    dynamic anotherStr = str + "something"; // fine
+    dynamic thisThrows = str + dint; // TypeError is raised
+```
+
+Explicit type conversions can be requested for some of the basic types:
+
+``` Cpp
+    dynamic dint = 12345678;
+    dynamic doub = dint.asDouble(); // doub will hold 12345678.0
+    dynamic str = dint.asString(); // str == "12345678"
+
+    dynamic hugeInt = std::numeric_limits<int64_t>::max();
+    dynamic hugeDoub = hugeInt.asDouble();  // throws a folly/Conv.h error,
+                                            // since it can't fit in a double
+```
+
+### Iteration and Lookup
+***
+
+You can iterate over dynamic arrays as you would over any C++ sequence container.
+
+``` Cpp
+    dynamic array = {2, 3, "foo"};
+
+    for (auto& val : array) {
+      doSomethingWith(val);
+    }
+```
+
+You can iterate over dynamic maps by calling `items()`, `keys()`,
+`values()`, which behave similarly to the homonymous methods of Python
+dictionaries.
+
+``` Cpp
+    dynamic obj = dynamic::object(2, 3)("hello", "world")("x", 4);
+
+    for (auto& pair : obj.items()) {
+      // Key is pair.first, value is pair.second
+      processKey(pair.first);
+      processValue(pair.second);
+    }
+
+    for (auto& key : obj.keys()) {
+      processKey(key);
+    }
+
+    for (auto& value : obj.values()) {
+      processValue(value);
+    }
+```
+
+You can find an element by key in a dynamic map using the `find()` method,
+which returns an iterator compatible with `items()`:
+
+``` Cpp
+    dynamic obj = dynamic::object(2, 3)("hello", "world")("x", 4);
+
+    auto pos = obj.find("hello");
+    // pos->first is "hello"
+    // pos->second is "world"
+
+    auto pos = obj.find("no_such_key);
+    // pos == obj.items().end()
+```
+
+
+### Use for JSON
+***
+
+The original motivation for implementing this type was to try to
+make dealing with json documents in C++ almost as easy as it is
+in languages with dynamic type systems (php or javascript, etc).
+The reader can judge whether we're anywhere near that goal, but
+here's what it looks like:
+
+``` Cpp
+    // Parsing JSON strings and using them.
+    std::string jsonDocument = R"({"key":12,"key2":[false, null, true, "yay"]})";
+    dynamic parsed = folly::parseJson(jsonDocument);
+    assert(parsed["key"] == 12);
+    assert(parsed["key2"][0] == false);
+    assert(parsed["key2"][1] == nullptr);
+
+    // Building the same document programatically.
+    dynamic sonOfAJ = dynamic::object
+      ("key", 12)
+      ("key2", { false, nullptr, true, "yay" });
+
+    // Printing.  (See also folly::toPrettyJson)
+    auto str = folly::toJson(sonOfAJ);
+    assert(jsonDocument.compare(str) == 0);
+```
+
+### Performance
+***
+
+Dynamic typing is more expensive than static typing, even when
+you do it in C++. ;)
+
+However, some effort has been made to keep `folly::dynamic` and
+the json (de)serialization at least reasonably performant for
+common cases. The heap is only used for arrays and objects, and
+move construction is fully supported. String formatting
+internally also uses the highly performant `folly::to<>` (see
+`folly/Conv.h`).
+
+A trade off to keep in mind though, is that
+`sizeof(folly::dynamic)` is 64 bytes. You probably don't want to
+use it if you need to allocate large numbers of them (prefer
+static types, etc).
+
+### Some Design Rationale
+***
+
+**Q. Why is there no default constructor?**
+
+This is a bit of a limitation of `std::initializer_list<>` for
+this use case. The expression `dynamic d = {}` is required by the
+standard to call the default constructor if one exists (the
+reasoning for this makes sense, since `{}` is part of the concept
+of "uniform initialization", and is intended for use with things
+like `std::vector`). It would be surprising if this expression
+didn't leave `d.isArray()` true, but on the other hand it would
+also be surprising if `dynamic d` left `d.isArray()` as true. The
+solution was just to disallow uninitialized dynamics: every
+dynamic must start out being assigned to some value (or nullptr).
+
+**Q. Isn't this just a poor imitation of the C# language feature?**
+
+Pretty much.
diff --git a/folly/docs/FBString.md b/folly/docs/FBString.md
new file mode 100644
index 00000000..bfd69f1a
--- /dev/null
+++ b/folly/docs/FBString.md
@@ -0,0 +1,46 @@
+`folly/FBString.h`
+------------------
+
+`fbstring` is a drop-in replacement for `std::string`. The main
+benefit of `fbstring` is significantly increased performance on
+virtually all important primitives. This is achieved by using a
+three-tiered storage strategy and by cooperating with the memory
+allocator. In particular, `fbstring` is designed to detect use of
+jemalloc and cooperate with it to achieve significant improvements in
+speed and memory usage.
+
+`fbstring` supports x32 and x64 architectures. Porting it to big endian
+architectures would require some changes.
+
+### Storage strategies
+***
+
+* Small strings (<= 23 chars) are stored in-situ without memory
+  allocation.
+
+* Medium strings (24 - 255 chars) are stored in malloc-allocated
+  memory and copied eagerly.
+
+* Large strings (> 255 chars) are stored in malloc-allocated memory and
+  copied lazily.
+
+### Implementation highlights
+***
+
+* 100% compatible with `std::string`.
+
+* Thread-safe reference counted copy-on-write for strings "large"
+  strings (> 255 chars).
+
+* Uses `malloc` instead of allocators.
+
+* Jemalloc-friendly. `fbstring` automatically detects if application
+  uses jemalloc and if so, significantly improves allocation
+  strategy by using non-standard jemalloc extensions.
+
+* `find()` is implemented using simplified Boyer-Moore
+  algorithm. Casual tests indicate a 30x speed improvement over
+  `string::find()` for successful searches and a 1.5x speed
+  improvement for failed searches.
+
+* Offers conversions to and from `std::string`.
\ No newline at end of file
diff --git a/folly/docs/FBVector.md b/folly/docs/FBVector.md
new file mode 100644
index 00000000..c340a398
--- /dev/null
+++ b/folly/docs/FBVector.md
@@ -0,0 +1,242 @@
+`folly/FBvector.h`
+------------------
+
+Simply replacing `std::vector` with `folly::fbvector` (after
+having included the `folly/FBVector.h` header file) will
+improve the performance of your C++ code using vectors with
+common coding patterns. The improvements are always non-negative,
+almost always measurable, frequently significant, sometimes
+dramatic, and occasionally spectacular.
+
+### Motivation
+***
+
+std::vector is the stalwart abstraction many use for
+dynamically-allocated arrays in C++. It is also the best known
+and most used of all containers. It may therefore seem a
+surprise that `std::vector` leaves important - and sometimes
+vital - efficiency opportunities on the table. This document
+explains how our own drop-in abstraction `fbvector` improves key
+performance aspects of `std::vector`. Refer to
+folly/test/FBVectorTest.cpp for a few benchmarks.
+
+### Memory Handling
+***
+
+It is well known that `std::vector` grows exponentially (at a
+constant factor) in order to avoid quadratic growth performance.
+The trick is choosing a good factor (any factor greater than 1
+ensures O(1) amortized append complexity towards infinity). A
+factor that's too small causes frequent vector reallocation; one
+that's too large forces the vector to consume much more memory
+than needed. The initial HP implementation by Stepanov used a
+growth factor of 2, i.e. whenever you'd `push_back` into a vector
+without there being room, it would double the current capacity.
+
+With time, other compilers reduced the growth factor to 1.5, but
+gcc has staunchly used a growth factor of 2. In fact it can be
+mathematically proven that a growth factor of 2 is rigorously the
+<i>worst</i> possible because it never allows the vector to reuse
+any of its previously-allocated memory. That makes the vector cache-
+unfriendly and memory manager unfriendly.
+
+To see why that's the case, consider a large vector of capacity C
+residing somewhere at the beginning of an initially unoccupied
+chunk. When the request for growth comes about, the vector
+(assuming no in-place resizing, see the appropriate section in
+this document) will allocate a chunk next to its current chunk,
+copy its existing data, and then deallocate the old chunk. So now
+we have a chunk of size C followed by a chunk of size k * C.
+Continuing this process we'll then have a chunk of size k * k * C
+to the right and so on. That leads to a series of the form (using
+^^ for power):
+
+```
+    C, C*k,  C*k^^2, C*k^^3, ...
+```
+
+If we choose k = 2 we know that every element in the series will
+be strictly larger than the sum of all previous ones because of
+the remarkable equality:
+
+```
+    1 + 2^^1 + 2^^2 + 2^^3... + 2^^n = 2^^(n+1) - 1
+```
+
+What that really means is that the new request for a chunk will
+be never satisfiable by coalescing all previously-used chunks.
+This is not quite what you'd want.
+
+We would of course want the vector to not crawl forward in
+memory, but instead to move back to its previously-allocated
+chunks. Any number smaller than 2 guarantees that you'll be able
+at some point to reuse the previous chunks. Going through the
+math reveals the equation:
+
+```
+    k^^n <= 1 + k + k^^2 + ... + k^^(n-2)
+```
+
+If some number n satisfies that equation, it means you can reuse
+memory after n reallocations. The graphical solver below reveals
+that choosing k = 1.5 (blue line) allows memory reuse after 4
+reallocations, choosing k = 1.45 (red line) allows memory reuse
+after 3 reallocations, and choosing k = 1.3 (black line) allows
+reuse after only 2 reallocations.
+
+![graphical solutions](./Fbvector--graphical_solutions.png)
+
+Of course, the above makes a number of simplifying assumptions
+about how the memory allocator works, but definitely you don't
+want to choose the theoretically absolute worst growth factor.
+`fbvector` uses a growth factor of 1.5. That does not impede good
+performance at small sizes because of the way `fbvector`
+cooperates with jemalloc (below).
+
+### The jemalloc Connection
+***
+
+Virtually all modern allocators allocate memory in fixed-size
+quanta that are chosen to minimize management overhead while at
+the same time offering good coverage at low slack. For example, an
+allocator may choose blocks of doubling size (32, 64, 128,
+<t_co>, ...) up to 4096, and then blocks of size multiples of a
+page up until 1MB, and then 512KB increments and so on.
+
+As discussed above, `std::vector` also needs to (re)allocate in
+quanta. The next quantum is usually defined in terms of the
+current size times the infamous growth constant. Because of this
+setup, `std::vector` has some slack memory at the end much like
+an allocated block has some slack memory at the end.
+
+It doesn't take a rocket surgeon to figure out that an allocator-
+aware `std::vector` would be a marriage made in heaven: the
+vector could directly request blocks of "perfect" size from the
+allocator so there would be virtually no slack in the allocator.
+Also, the entire growth strategy could be adjusted to work
+perfectly with allocator's own block growth strategy. That's
+exactly what `fbvector` does - it automatically detects the use
+of jemalloc and adjusts its reallocation strategy accordingly.
+
+But wait, there's more. Many memory allocators do not support in-
+place reallocation, although most of them could. This comes from
+the now notorious design of `realloc()` to opaquely perform
+either in-place reallocation or an allocate-memcpy-deallocate
+cycle. Such lack of control subsequently forced all clib-based
+allocator designs to avoid in-place reallocation, and that
+includes C++'s `new` and `std:allocator`. This is a major loss of
+efficiency because an in-place reallocation, being very cheap,
+may mean a much less aggressive growth strategy. In turn that
+means less slack memory and faster reallocations.
+
+### Object Relocation
+***
+
+One particularly sensitive topic about handling C++ values is
+that they are all conservatively considered <i>non-
+relocatable</i>. In contrast, a relocatable value would preserve
+its invariant even if its bits were moved arbitrarily in memory.
+For example, an `int32` is relocatable because moving its 4 bytes
+would preserve its actual value, so the address of that value
+does not "matter" to its integrity.
+
+C++'s assumption of non-relocatable values hurts everybody for
+the benefit of a few questionable designs. The issue is that
+moving a C++ object "by the book" entails (a) creating a new copy
+from the existing value; (b) destroying the old value. This is
+quite vexing and violates common sense; consider this
+hypothetical conversation between Captain Picard and an
+incredulous alien:
+
+Incredulous Alien: "So, this teleporter, how does it work?"<br>
+Picard: "It beams people and arbitrary matter from one place to
+another."<br> Incredulous Alien: "Hmmm... is it safe?"<br>
+Picard: "Yes, but earlier models were a hassle. They'd clone the
+person to another location. Then the teleporting chief would have
+to shoot the original. Ask O'Brien, he was an intern during those
+times. A bloody mess, that's what it was."
+
+Only a tiny minority of objects are genuinely non-relocatable:
+
+* Objects that use internal pointers, e.g.:
+
+``` Cpp
+    class Ew {
+      char buffer[1024];
+      char * pointerInsideBuffer;
+    public:
+      Ew() : pointerInsideBuffer(buffer) {}
+      ...
+    };
+```
+
+* Objects that need to update "observers" that store pointers to them.
+
+The first class of designs can always be redone at small or no
+cost in efficiency. The second class of objects should not be
+values in the first place - they should be allocated with `new`
+and manipulated using (smart) pointers. It is highly unusual for
+a value to have observers that alias pointers to it.
+
+Relocatable objects are of high interest to `std::vector` because
+such knowledge makes insertion into the vector and vector
+reallocation considerably faster: instead of going to Picard's
+copy-destroy cycle, relocatable objects can be moved around
+simply by using `memcpy` or `memmove`. This optimization can
+yield arbitrarily high wins in efficiency; for example, it
+transforms `vector< vector<double> >` or `vector< hash_map<int,
+string> >` from risky liabilities into highly workable
+compositions.
+
+In order to allow fast relocation without risk, `fbvector` uses a
+trait `folly::IsRelocatable` defined in `"folly/Traits.h"`. By default,
+`folly::IsRelocatable::value` conservatively yields false. If
+you know that your type `Widget` is in fact relocatable, go right
+after `Widget`'s definition and write this:
+
+``` Cpp
+    // at global namespace level
+    namespace folly {
+    struct IsRelocatable<Widget> : boost::true_type {};
+    }
+```
+
+If you don't do this, `fbvector<Widget>` will fail to compile
+with a `BOOST_STATIC_ASSERT`.
+
+#### Additional Constraints
+
+Similar improvements are possible in presence of a "simple" type
+- more specifically, one that has a trivial assignment (i.e.
+assignment is the same as bitblitting the bits over) or a nothrow
+default constructor. These traits are used gainfully by
+`fbvector` in a variety of places. Fortunately, these traits are
+already present in the C++ standard (well, currently in Boost).
+To summarize, in order to work with `fbvector`, a type `Widget`
+must pass:
+
+``` Cpp
+    BOOST_STATIC_ASSERT(
+      IsRelocatable<Widget>::value &&
+      (boost::has_trivial_assign<T>::value || boost::has_nothrow_constructor<T>::value));
+```
+
+These traits go hand in hand; for example, it would be very
+difficult to design a class that satisfies one branch of the
+conjunction above but not the other. `fbvector` uses these simple
+constraints to minimize the number of copies made on many common
+operations such as `push_back`, `insert`, or `resize`.
+
+To make it easy for you to state assumptions about a given type
+or family of parameterized types, check Traits.h and in
+particular handy family of macros FOLLY_ASSUME_FBVECTOR_COMPATIBLE*.
+
+### Miscellaneous
+***
+
+`fbvector` uses a careful implementation all around to make
+sure it doesn't lose efficiency through the cracks. Some future
+directions may be in improving raw memory copying (`memcpy` is
+not an intrinsic in gcc and does not work terribly well for
+large chunks) and in furthering the collaboration with
+jemalloc. Have fun!
diff --git a/folly/docs/Fbvector--graphical_solutions.png b/folly/docs/Fbvector--graphical_solutions.png
new file mode 100644
index 0000000000000000000000000000000000000000..c4ac68506baf7d0813686312314e525efcf55d50
GIT binary patch
literal 15295
zcmb8W2{@GR`!+sfmn{^@7KzGkvSdvNDWb9q(b)H8ER7j0mXuIgLn2bP?2KiKsEj51
zHi%>&yTKU#_w?!WU4QTU|Gje@>X>Jq=U$%YKJV+i&g*=luNmsH96Ejo0)epT>uH-n
zAkYo)C&54mj>uy)zkvT}{4VO>U;v-H431IYJEOOrg&#P7fbs{ucseZz9OUuWG55dj
z1@}kT``&>d5C|z3Pd7gYd+$3^UcOFg1l8jZ$O(wPw&snXmrHom-K2o}gVk0yjlmYX
z6AsvG+G39$8SX_}aQ!q{b#@U*Kf{%rYiilbip+nOkv@{2qojS~*Y}G!lh{+k`cK>q
zK76wMVMKHEq7QTYRcttRdwPM?Is@yxN8Yz?tW;aB8u4sm8<+`E<M*ytUtS2YU2c2#
z{{4GMrPp`{esFNGxV+q-UMBwe=kjZ|wwaA?14RY*S(Tktez;bIHBWXNMvz4uqcC9{
za?Lh^c9Y$23}WD&8EUsb-lMH@D$VYZuK79qV!(!ojbKA<FZG9^zQ2yo=9W<UIfcM)
zHt!{F_Kb5onW$*_IQ?lv;|$NRBcWkDMqXb2CgkYRqc^{|=wJG0CmM`AlSSh%fAm{<
zsr}d|EqnPB+hD^f7AL(K2x(Ra9v$?#wSHl3yN3@tPjoQ*3>aOEs9q2|r4?E_$$CQg
ztC^LR3UiP`c6u@k)cJi%-cjl_xpybKb@Q`g^de1*Z`sh@aH>wzLd)~J!)WCF<2{YF
zxZ?v0Ez027A{)#a9vK{gQKtOR^%h)#+26?1c)@>n<)ar($$PC?YpwBP7bN4=by;k*
zhb!ENN=qG{s%>pT0&mj!Z+yIP60~RXr@h?8DZzuF!z@VV6(VaT=w(h+-k;s&Sqgu8
zTH1*JpnaqViygvsR#=E~#n#>1xMbOsrycFS==)t~5Px+adNx*LfBS@C)l<5$7NisX
z$ejA09!yg#q*i`Bdh|#hx>(fIvjpFu-mT$mo<sf_n;g_!*6~Z64Rq09pi}2!BCXvS
zr_`Z4{dKAi;yCxHl$)tTjNf&R#3$c?$U~P5SZqWb+fQzs$F}EWh^+e~k;wf9-qax_
z6Swn%hxXD^@LV_b*rOjFuhnBqxC3tr&&Sdr8V?0gR>5?kyA>l2K}^9;V|C#0?>a~0
z-zE=Tm~ft$LkH$ZNO6=(*zuJN<B2)8p$(OlfkEOU`>aY$I4L3d)PpxwTZ=c@=U7Ck
z3`UWNmQcAi;fh5!-LmPvE*-fzT4t&-*HufK>({T}5+6~3EK|)uTA^*9o{9;}?$CTp
z_Y|ac|AY*s9{~^7@kHc}Y=NH1iVqJTJ}e|Rk+@c@5Sf_`>C;zXp<nRhvBGbowC5!z
zu0xmJ!}3Oi0@!K=aKhbJEwFyrYcSq)&)#R>+S&#n$wS9ryf4PKY+lKb7x=K@vFbiC
zdTb8x?C`q?l>953z{;#7$TH21>p7ht=I7_%?X1rTV!VHRDq}iysPe`0=jU+gD#Y~B
zQR_@uhfA71KC6jyJ$cf>b8nI&a4E2v(Z$8ZdX>P%(@H-ca3-Rb+-yVTa;!N4NFvJ*
zGU9{?yZT@Lf?VCcWYwte<U(<yy6@KF5F1_mk8NJBy=DTGI<Ri<V1L|pWP~v6CUzqu
zEQD;3bZfUooU~_pX_J}*K>>u2*68N5gsiLQAw9WQtxPQ}YF>Z%@WIx_WsbEw%c<jK
z&D-FnwY9aY>oc8=?;9G_8>_2>oB{(YMVq5HF195}Yb=)c7>1sIOWu56EFFee9OPCR
z_8h5vxlhkma@~onUHjEC?d>h_WN5D$8yg?>c<I>t7ZQJW949ot3r}Qmhp-}u?ct6x
zR#5AEB!eZd7IADIekYPWWE4gB_e3#aCUWJv291yO3^>s@+uKzg@~!$=bq2b0F6Ko@
z2}`Cks&Y*1v(hO9`nRPH(N}f3CSvpyDR?$;f})tcQIXW*Dj#fQyJkB`(0;DD>%Cn%
zG!U2y`UW+tYmFKDB>HChpy%m)d0Fk8H1J!_5D)e?Vw`tEoh{Fg#RkAbE(i3}eor0G
zn(j4w9IPch0Pj><H6d1I6pIZ2H8bdq0UjvQ^^@R5SKY&BQZ}j*^6j|u1E~aA0P67G
zxvf{aGm%saKI22t9`YBtzs4P}jk*w&H=^Q4<~s+zCj;mRI>CdnkqrCcoayyptoiJ<
z;251R1cyT##qH`=ysfPbX=G_%G@rmCZxyj_b8)jrkLjYhN0C-o($Y5uo4ipSxn9+a
znbf0K=Fhkp4Gav-RFy6n&y8m?SkvK7ZeH<cpBwt51ZH7-gU<h(jfjH<T!An7&=qgu
zd$bsrg!#3G<p{#Q8zS=u0jAe(Nq&fNOSu%EA^n)tnYD1zA8dNej?gJ|AE7~vJiJ~}
zQPDIKa{o<6#>RQOFq$205$!p)jgx3@sxcpdYlrH-ujVpC#t=@ruXwa~lYfcv)%Q(S
z)aK^q)&-9F(UtV$RY+g0v=bJtylTPUZ@6np*Sm(Iez-O3kI{pa>XEgM{{icg+PwF1
zZ{4@~+9GG2s8I~|jW#u5MLkQx&@WUO)Dfve&agIG(HBWc#ZZ|tS-8TxU<VlQ6<38l
zuSo`R;g9;Z>Li*USE710{o<o60s;d1_IB2v+Ejc07^0=69l3n@^6_6C=_Er>&!Wy`
z+3Z(fowsew^&~;|W_2}`gWFT(-)L-qWDDc=EsCC7j?pT5_VXW6MM%qeQ!sS#12t-Y
zJu}mfh({bq%rc8z%?u+{8fPoi`I0wL3(g~%<i(Y#L`mKoTiJq=$P;gOg`!j_@cRwt
zuU)x(XqTQ`$@c{X*5CgQxN}`Frw;30->C(a)u-3BLn0dYrlljF@~bF3uaO}iKmKUJ
zNCT<kREaNrV|x%zO{7Dz)#+tA6t+Li2Op}p;PXhqP;UM&od4N3;7C{~UG*3~^N{L&
z3;q@M(E<OuGq7+3K844ejQ*Vd_wJJ8==?M>?C9KmTmg*|O_1n=bRzwu@L|ISt@q8*
zG#6NGa=esddb`{0C0OO@g6?-LTU8bQIiAWaG4K7JF!xQ%;zHS|4pml!RCnuxyO`tt
zO9K9sxOD-9%?&j%YPV6f0%IRD-C5;nDGmr&S`9ONpH%RyHpjBk;+c+6Ek`8l<``WA
zVKMYm9Y90G*bw{fHM?gRfzJ{NXTbUDBE2JhCbV?awAGD$KPt~C<E1zbQ`$L&8|ix$
z$~g(3O!W$MUnMf<ui_km(X#_bcB2?P&F&W|`ltJHGwN}n-@J5zC#4L<RVpZyxx-YN
za7A)utKUB#iHAHuf<b#dC;|}*{fm8<Zvt*&WD!~mwnEf6OS=T`wwgDm3`6ya*N=rc
z9+!BTQG>n*9aDz9?as=4#1~U#n3vdT28)l}Vs-Z;KCL04YDkT`JU6uExVAR^x=M?M
zKYXQXfxgEhjdm+$+}zwE0akgia2TmR1@8)P)k!qbmgCr6a?KY$8O~pgxD$U2bb$UD
zr;Hq1%lQ5qN2Znh8T-3O@~NAUIqFYh^wu6y#*Fl5O$GNfna?CMwzWLS!`<Hg$ChcO
z4V&sT!3`}Bul$OO^&uvm{BlM(C3@07)T}u3{-s&r@UQ<Pn$FgHuw2UstA`xZ9N>yO
z9DhvPA9EaQUfb2xE@Q_4Hba_E6uS7UGZVG==^<06)alcMrY0ucG6=#b!q?9a+47i&
z_Vee@R=Gx*4LSMwXGjsep4BBEKKLA-$V(3#Ubo%tPRd|8Gtj(2{4BK8DE2p*1Uyf!
z$oBemzb=4Rv2i33ufYUsdLVuG2W*p5tYM|)O;SU5N_vrPrmgq%3&Gm`ACZEjTZcEb
zxc*o1ZRg{e+P%HKdJXcN5vo3nynD~SDMJ2^lhcCzAz?Kvr;)19_TUExEseCqMwjcW
z{jv_qYQtkCStTgVkED@LIFat8WYRve2JB~tUcG+(`s}dM*SDDu{$*=8yxG3!g$*F=
z-*D!^`J#zv&FbYvYbkXqod3*8o&h$CQ9xw>y5e%gmh+rh{4E?V0-DXZ9$4{QZlbV5
zVg20IC{9KYD)g2E6@H8t(d9Gx6bG!T0a6GBq;L)|N{xzo)xi!R)qEMCGO4sTV9Dt4
zK6$3=vsHgL1ppv&b%!Ep%;yNkqcerq3Bt11!G-l5v;vRbnj#Eq*t7t(lK~CZo1J69
z7cd-SC|H<OF+lH5P)BcEr5uk-c>>+VkPO(L^D-5@uF#Pj&I8T-?mzvtI)1E=(FdXh
z=qux6$~ceQ(OO=6pai3%Rv8^SYA6!l0H|<I(uZF8rDy0?;j{TX1YyRwKWr1OkW<0Z
zUeJr{@LUZrFE`f7dDcO*KS@v#euofQapfM*24_Rv1hP7zo}!!2EAkc=7S^qXwx+OG
z=R8CcECnsK6U1r(xutuY6&mx6f^l>y&Rt37Xz~QX3~D4{Uh=B$$&#QSk!P-0_oTrY
z;q=>Zi=4zzq|)nz?C|z#ucb*6B@o(KV3q7DFw6RlTX#J?mR1xh8+&mA@@2)T7Cz_L
zb#j;t>BfACwNdi{GrQ+!pJVhUO3|8FE6H2$-CSM2x!oCQ)t5ZAV_`CYYv2!4WuJR}
z+Nny5$Xjbz6GI~r)0_8GR`M3FmAQE}a-0QU-+>=^qp^eHtR6HO=+qWD-*F1ep{t(J
z=M}iI;cfo`!|-0$>u`m$<b_1eY4t;@(>As|m$fh6hSNh&hC2yMya(4X+V}PK-2_Zc
zBk;$khfdr_j<}1TIpdP7f*`2W)YPauv^>5yIXNlWlV@Zrhuq!p8xL71WVf)e$awLh
z0x{p4-}g-P>iqn0d%C<V8R?@(TBhV!e=lakfyU`XD?{rF*S@!c`zw)+PtG<quXi^7
zc({!88pAlWDg@q%OtM~!cPqjWD#pXMn^?S@Of)Y3TSoVsTE=6z{LYIPFT{`Y@{(R8
zCK68Yhy22&2&(D^_ZL{O71}m7Ku**@i4yq|Bi#_xtgEs6ojdzoQkBc~#>I4%^(y}l
zDb(lWr?&)UZ_iZ9_(pHs>>svRPfbldiw0@VJLf5p?B7Y~@B9#qJ-yHiPKynhutSV<
z=+l*PhO2mM6ul>%a}bj$ff+Qmke%ZGiB^yvmT$b*Tb5xoKkg4(zECCF?81+T_of6Z
zR%3v?6ntqeqmM9@f*$_Y4K^&bsJ2vib<6`wG>n9m`PxNdiXT<`ixOYC8S})W0U_MD
z`#sassE|j44#XOLX0Ur_NXIS!7RXYKnCUOG2kekT2^<vws83MdBo9OxAc+1dvj9e?
z0dzwuzg_@KGPc@N6p&Crb}6x}49O8B6H%KI+XWIJs$6hHE5$<K8gx%RVhw<mF&iB5
zL-GZE#4CQjW(BU|5M{!ebet<|!@28)ddZX(Y*e25u6-n4;V#Ga_?P#&qLb5;4Gjxk
zpV2~Kt~5o#57g<aQ+luWXA~6|cY3Y!uiTj!z~=wB#2H_ioD%Ok+ZdB|S2TNF2y2Si
zqDj`(MoYkWS+CHFR+DaGIhnX5PJ4Mcz*L_1YMpqO9dlZ^P>FHyC!x3%45m|u<22JT
z`hq~>FC$#%Kz~20mjkH=%Mvfb3DVm~N`_T=v%58VRCl2F(N&gprUSjb&#RUr;DJY1
zsxgi1H<j33t{Zt$`1gYo08`U{W2*jzhZohQ<e@7Q<`aJKz`Ha&Iytm3@sk0PAud<g
zV=u;hixM$pUE!1I#a()KJm2W7w6ra?9ozw(PH5mtP8<`MIA_OGBDB^lyrw^)i6DLv
zPn)Kur+Sa6p??By`|bZdwz}7@+U`wqOQ$ONOvZ5A-d+6`=X>=E%9gKtKlHZ>@mKC+
z-?)1{&yYMCJd<Aa^AnTwUT7~*g!*Q0iBFl6?Z&oOn4bY6aAUJLbH#|Bzr22bcfpoy
zg_uX=!7uz%Zvi>c8G5_na+kKV;^JoK6co0^B_;8i${Qey<gVDt%>>FsW3<%ksB>10
zVMt9at){52&%|2glJ0bkAE=kRe2GKUlJ<#XZIXO)j%J9AE>U6rlKzh2zTVy&&>P^R
z!z;$~hob(KaSm2Bq+ErnPZPL5CwyEKeFY(U`8(Od_jtAwRZs*|a&q&35lD;4mw48c
zc5iwc5Feg^h0Vsk-ejH%z#;M+eCuSNX)-@iF}i48JJi+wg_aJIcK`7y;WJ<*tLdt;
zIqYcAQ$MLz3|uFl!m1FoQJ8c+!ML7PQDKfME;fhrYAweA=}WK!D`JD$NB!}e6wu~9
z3i6nJIz1zAXrUzU6W@d70@Ae{r|omzC>&2L)vd1+WY7UcRnf0@%0vd9?i$Bu#+-lj
z=VFmj!GrDAY&88OE)Lu|Xjf+M{nT2D0-{YpSQQX;vZHqVNpW}DA`*rkC;^Jfc#|u~
zGg`I`*1jkQ<Lw@ffScZib0-hYv@fnWJ3QLBT3o+1{HoBZiVJF>JgTHkZCWfPRMxWu
zX`;l<tLqF%R&Q8-UV)koP}s)5RXRcPAfK_)?FCkBPKV<=LbrcJK2=`(a?&<{NSwkM
zo0};<$MMzz&L;FtuIvZ)hTT1kEv@PYyN~zQkc$4q10SMSHHjGRg1z(o($TrlTR}e_
zW*e$3m0#g?X<1WnXSn^~);%y=XJ51r))g`SENp3KczC$YYGe?{N^3~lLGSrg^tIl&
zBb=fDT<$nO^U^#6IcBn-dzI%&SNk!By5)|o36p3&0}Jeli9)X8J|NXiS9~B}o6t6K
zQ}$vxpQdMv2(z#ldRJFhXKqwA<2jU&R*xiIunVBWe4h)9kDG?DV3WCupGZnbxNlxB
z#~iS1r==XF7RN0LAjWw#4dWYTd-5_uTo?03A|<R$>)a-=SA)|Om*;{O6u<+1Ai)&G
zddwA`-7dkMeB;)w20ARRC3)!WMlq=>Xk5nberk=L63+dw%#TsO&UVKed{N=zW{Hbu
z&jgD#;lV!l0QbM=tUujsNyEWjjpVHriZ&BUB($p^?5ztn&c2Q@efZq%_AY5MZfl$q
z=%2qskDygV@cJ))jQ?DBwrBlj|1Z&FF66}6-LFQuRCR09(&IyT<Y>skAg5{Jaqlj+
zu$9H-+SM&eYKgJZfc)=*W=yK$OSRCg`wIib(%uN(^S(Kvl~E6z=epnMru%p67su6J
zJ!eW=QSp#PaNk=?loUk7hdtEM39Y1kp|8E=^nX)Vi<sxytQOE(;07I>n%^Z2Be75x
z@rK>Lhpl8%qiqi!nJmj4gio)rL3-JeL*sJ(5~Tj5jQSa6=)BQ-S;C3r>0j>nkPB1W
ztF;kQ3EluYU)No3haEFu(p@|wF<Sb*MCKU}0mL*8We@{t^v7L%Syx7?%X0YpZdkwk
zzPWy`a7v0p4*cf0qf|1Af7ZPh>v>#VH7Y`#dGIhcOgo2#(&idy^Txb->{!^!M;>{9
zSZO#l`pHLaj`)7?m_;6>+<|5apC9Oi#*X=J{{Ue#CcK@n$RZpURc;6neO{fpw}zDF
zLl4DD@G9-$aZc3>AOz8X@SAYbC2yn+y_<fYe)*fXMs1iBNBGNX>ZmXDQjm^V6>TQn
zXFISi&8mWE?TbJGqLC@%L@yEcT}Le}m5!UUW5X!Cl8Q1!Z=tWscvcx!bAdkxEg*@$
zxhXbqxR&Ke7w!;l(He#s#YO1wUe6i8Fl=8P723*T2YY4re`z^<CIiGv(3tkcNj;87
z8@_M_0W9A3u}T}gNmUoWzRYheis66CK^F+Nfy8AhTSx<+y<z;AM0EDLoZZQfswdz|
zV2D|5!U5^-6&OX0yFVy9!4Hz>?7{ZSJ_mFy!&>wPEjP3=R;5|}9+(CtTaV_mJG^2V
zGcXUx*V=H%{S<z-(F?BaF6>_~2pd)9(Z~z2QYOz~ae-PU_p_S=6BAQaRpo~iewMvM
z)mS`5{<7vUs?xf^f`2S*SEZsN_f$l9=}-DOyOGT+_D?Yf@ZM~F{vatI;w$x<n2i8^
zRXpp*4)1|kYY{3_8yg#W)c&3Wx0H2Ipr+=>%7lc3h#J3{hG%JM3otnD^l2%nF$&>5
zWp*og`VIxd;X6A!BhW*K4h8P5B{ja}Hu+1`8AciXkySBaZ+Q|WEaYx?ygbj1M~ovi
zVza}n+?Q{qZ5SPFw2Ou9v>mvfJu{S|AE7J26TE^)2wM6K*ccGLoG__cpg&FcM+SIG
zE6o>@do``HptRIi$HBp&vAesQKS!(c<#|8j^$3lFozCT0vDbRbH*9Q>8?vo@&#KF8
z?gM3Yg1oo4GzZU!kBF%I^y!n@$h|M8gqO-tFY7W){ttp`Q->W~JUnJN?J##+CwB1}
zHpyE<yoVWX&Ga1O=Yg2o*lMI#%leBSgZfJL8(W>*aT7yJhZmU<AT~!wfpDOjpp4<C
z5Kj>Rx*(B&K$RS>*pTzDkS>7GMgcf!OMK58X)g``KvQ-3CD#k~-&buU+^t12mq3Yy
z&?O_FKnf>tA|UbVv#&SIIQX@^2RbzY&g4Ld);<lczTU##wdn^tgtkUh(tOZhcgX}D
zmxCtw@1-ABWCrPOG$oBJPf*4112tFg=^L;?#5ZSpDlX9T(baKu2>{+J43b)o^qfpj
z4Jz}P&}&bX`_yKI*}EkW@v|bqfhshiza`b^8~?8EP`4L+!d<$=ZJaY>us64L1YauJ
zeP2G)_qSa7A^*2rx{S$6K_0IbtUjuA@*P^ZzWK8V$zO#gsLe7JjOxLE=9YIsHb)La
zWL~%^xCr*RBnZ<a>-TUO!<t6^iGA%Hev2IS7sY`@wYh<e;=)3H0SD3eTc2@;w3RiX
zaqV*EAU!#4cLyEX$6zCJ@n1vf=jXaPio48T1=9s-3tEt7XcAvI=-<&7O=rU9=&Vp#
zQsNzYC0IM;D6CCbzKnd*$7$|D9Md<uO9EZv=~p$Eu4@k`(W=rj%a?tJCo*06oAKh@
zZ@h%+gjDNaD8w`hAmUhQkYww&hDX}}FX1r7DdWt}KC#iZ#-TM9Iy5wdSzlWV-20{2
zxZ|Q%BbwVN&FXTX_DAI>?D9BO|A^pKKkwT6mgQhg`e4nUZ&PKV_kB^Ve^aaa)*kQm
z-15rH)=4mlTdH?|E>#JJ5!8r#n_G~{fSNFd|0Um*_X_%UChy!R1+LLzxq6*FAa|(u
zo&I*RxN(11cdDa1z78?QQ#qaNK*ehebaA%R5~J(4u8}z7V^c88#M@V|UOjq2M@Q#o
zxle-e-x;LOK<BypDM*g(M;JsXK0+^y4o<C~E+B{|jh&E7UJaf&hfvRSsms_WzEb>C
z+@R!<p!yIL@@X*;ogF@4>Gq4KOVUl49psIS;VDO{oS;l#G>n=?QPf}oa-0WH^9HHd
zdyH4Cik{em?_Cv_U+Ogj6n^@C-LOh4=(p<504NU)4##?;A`Z7a+0m`v`Wj^YY8hZn
ztVq!a_z*pO=;z{wuD#M&fn!7afyx?1xuRGs-P7nvhZP1@k%s$1zlIN1e2aXf*9uyv
za)iIZ`d|eqjoDv~>H(bW_azIrC#nP%ueW{jmne~#K+c0;$P7-u7jS;%eYKW<xTgi@
z+$qZSx8b_bQ`jl6@Q%Pv_H}_heU7#iA~Aghh5_kp`6w25sC^Mz8E6NufhUfyiGM-!
z_j!H5Gr-yyO9K_<p63c`^7<oTWnr4jijSG+8ri~8*A}lJVZ4rTdU_iS6OWWwg`Bm;
zl}#(pw1H@i$nV&|(Q(GJTJL^g=r2yG%uQ(qIuZG`$y_5xlZSrT^KmM#!Wc-&gF~5`
z$*s*DaE85PXZR4@6b=v8Bq$az+w*f;^;kRp3(wAEm7_Ih+WU?ZaGqlOm6juLs_IzK
z@gT9c8rY6r#WD_9C{;3>st4SR-WU_Z^9{FXgq243!4+h&yKK+^EKG9=_Lv#2@D+Cp
zssK&;d<`X<AHUk!?w+5L&Uc3n2AeoOF@=5F?p~9EJo|2!<ZcCZr{2%Me-(31dD$_d
z6PsmwKU-QT;L5hU6H)LQ61QkIxAY4_q`TS3#Pdm2H1OB>#lhAUp7^T}2(Jvq3((x*
z$S|F}y;#=CN^1(ypxuYV$C++7+B-6=Cl4n6XBs|UddEO2A9c8udMZ2|yF(&i@dr(K
zqX6Rd@B_NADFOz@(m%9|eSw`KsPicCR-Zw7x0`dzS<|Xc5S(&R8ee9bZK?f9E;j7&
zAOR(w?abu)gF$?2-T|{GT?4yM#8YT-9_*#o{CIDbZ~+@Sg$1%NSn@X8V{Z4JT>2g+
z=%b~%q8idVdHf;S$$A_o3Y~#Q){<%nVfN@vl@~$<4!4~aI45O4F!5YGFDl^sMaU|g
z6Hj(GX`N=mEtZ_v;=dRn^EA5f+aHbGFN(rTjMR4=&#c5^8dLV$wvXlCHx12&B52qP
zm^N*xV_{|Nd*hn&aiOl5xF0konS-h8k1KkY40!yAwI{YX1L+W`0lZf5et_6QvD<3q
zNVq1uQ!>?FH*WXM!J>vH`dNGJ3ULB6SDb6;KJtc#klwk{t*7Kun(}0Mv4OhafM=o;
zD-Sp2*>D;1+zVx1Odfo=o5jzGi4%J6a2phIJGfd?h~YLlbe8dR?v%lMd=(Cu2z~$A
zEdXpU!Muu|i)?%KpBIYbQ&)P<!c^#vEfP?S!sgub8qA9)0$djw!+Gz$9skUHz;ySc
z$-q$K#rYZKb8q>&gA-G!*Qh627~Hl_${*U1_hwx=;myC2{Z=l=?<MO`49Ow=ZrcZ}
z0kk#fXweXKs2HLlUxs1|I^gGFxtrR%j(;k=DWuA!DaeUAQSNY?TifT#p6E&`rt$Df
zSg2(Ln<Mle$=K+v8#l^1j}1+3-bc3tZUxZhyD5avUL^B|PZQKrd!h40c)!LZGXm0=
zv%k7QJw~gn_FeCuJ)Al`5`7kd68_ZfL@ixQ3PBtcVlru7GVXa{^ORNAPEN?eRML72
z9mJA{zL-{BK{A}@GZ%1#AW&RI5)Yg_PX^#uzD1$vmJ^iwLh~?YP#t)n^Mg&RLmSD1
zw3K^%qSxVdij9tI!?*HRTSJ_%Me#ogU=OF_@6z9SLiN;qc#|bAK{Z1`y*+Q7$TLy&
z>Rv$Bz9v`{a~>wo!dau_?qd{dNU8`8wxH*|JZ$(#-YkC@>R9z>JasxlA7i29<}O!$
zk5BmcW>@TZyyY8DVVTFhAMXb7jtDT;MU!GrP$wHRIAY3<9%HWK{W&*gXnyF(o6=+`
zEjE#1;aL&?HdxLVRYU_#lUuc67^><cN}bgEq>*QG<t>D|^$wCi@XvT-Hq{lZ5AYzU
z7+k(wd6zUY9wsd&)(R@_DeJon#YvC(&ik&Xc{Ny=+`PFt_)7DBAl|o4a*9o90=<-U
zD`f66x>X?5{ip5zT9TGm?Og6D%?HO-gI2yn((?}}TSNU!Mrf>{M%dnPz(PL@RkEx@
zOOG);Xm>6zvVNo6ps5PjdN3d|vYT1^e1?f-1l_)rc`0pSJaEJVGk$lyU5FAfOMIqU
z`mQF+WD%ZUH+1>3aUdPVEQz99=r3GA9gvAnkyB3;FVnH|4W9F#?J69wLEzVo`!eO-
z`p<w<czr3}70B#B@%Y&L;s)iZp0E6OwpX8`?(P9)S1AOT6&h_z>^{Z=?y&6S<V2#J
zbbD`m)h+GdiS{bYd8Z)-=9ICU#fF@1e~MgmSg;yduGWcR71;d5bn;;3iI?h8$B4|m
zpBYoOA#-ny#WL$Qo&bXXW}~32BV-!<W69gsX}A8Xo!qdVzI=Bh>SXL7Wk#Rw-MeRF
zOP)8&Y|9BVgIx#$fuffU1dS9*c(6ro_U(Dyo#Ksr44~?9>(-b6y2nVPJ`jwI7e+o{
z64n}QZH^rTHm0T|D42u;K(z%m9rK{d?dB+Z2*@r1$!^8<{iwD?$@5b!Pe%G1Q3v&N
z1ElF+ze2$N;ECSd-3<ei7TiPR)><}#TA17CM@Ju?y!3cb-uLI1bYmyyqDH4q0Tm$i
zRB<0Yy5xddPA@*_y>8?&^ueK(p18&;sD5`KPDtxss%)BZ1QJE(RkwOcvyCVi&}+=2
z=20NN2?~kpRURW-9~D4HVa@Ei?wvVdPI7zT_Dh6q_L<RJqsW_H3JMCp3ao2NHL%}x
z1?vj|o%9;qB$BJT3W|!PyjCVYhSD(~>vE0IXX0iZ?<pQ9q0TSYv1;%Io@(;zoIa|y
zagDIz*GXO<p-eL|DTx?{I2gxV7qla7Ee#ddtzEVUK9Y`CPwCU!5BJM{DBElT0c|Vj
zaEq)DSRj=^8bKvHD^TWqpN{2Q(1E5SC3dsBExDWjgNY@79^v>GZ&@RZ2e)WHQB-2M
z9=QdRv7#A`B~fd_Wu~Ym$_UnR_Mbfa*bM%wqUL$I_=oq|e76C$tH)rYnt<yzUjkwv
zNnLCHt7~-1k)AX8RwO#=;>Z$&TC$FaC`g9!M2m?epUByW-a|e=aO#2-<}CpzPwhus
z3-t5GbFug2Y~OpRPnIws>qkd}Uy0VNqnxX1{hDXQUM|eM0;#(@<@_%uPig93_CCuA
zc|C>Z)*7w?r(U7_KyaV93cNyu6E=w<QtL{ec(dG0UjF&%5Y^p#rp>JMV;{-*R|xkO
zJG&$mHMjmoJ!wiMofd9-aXy^cxR_h;xi6@etMgGZCW^2+U`@AIjPZJ+qNm6oN%81h
z22-(tJ<JGR4la|?odw@yT`RH%`ua9A6}@iNq}(juyaa2LE%Sw`!0Lj8zuLofpD7pa
znU7k?QDz|hqHLK7ZfbqwhW{NP<Sk&`VWG|V#<6dq%T#n!PE^4{Ge`9Aby6WPp)E#-
zV;&CK*_|#qn{&wW#wC$Ug-^PQPm7CNfq@r9>Zt`yS{m>JTMNcEH7p2eWNogMV;~WL
z8Mu?9{$=aNG}iYM>B4K!=VME^*(u#8$8WO5h0Qh}w{w4<rOn~YQSuRQ*32(LZ7+42
zIzOU8y_p=Nr{de4z5I(kh+oEeu*6={X{8@}U)V@AK_ZE|@sWyl#3$>uqpywt`Tq7Z
z+8Yp9k1aif1vnS^yKkp`xS+*6q|IRuY)@pg5fssyeoBj?Am6qigk@rE6ZTyp0yd!5
zC4@mNcgU5UL_@K@z%7KwZKdgnb*JG&m+M<0Pv0CMH?9at@3>x4$j|HSi~N9MpG&@J
zW@c7#H;Yo_ZsGN+gEvL6(55MRRet;SEi&&)N-34y+Rv}!byKk_zn+5v*meh^Vg18Q
zCTf4MrKM$3^FE!2gtRnjCpXR0lJ6Qtn9y7*7K8uOT-j%4i`?UJdbzstncKEbE_N4)
zhOi<1RVL{pf+WEMpPfm1fo=O^JpBbcN)xpUhPA&w(hHJ`2|wG)!)X33m2zqP9vBw3
zKL%`#lH=7r4xYdxkPW1f&@`0S_t!e(9{Zb(<dH@-!_K>0<$8IAmKE<%T3TBBwA|J!
z#OgA~+beC-Y~#iz9K(`#7Wy{@ubEeq>Pxnyh55_+1!k=O4=M#zeItua5*oDjRZN3V
zmy87o)i@oNY-R5#GsQO%g6JTaHZU(lO8%>c^1Cj_i<6iEQ<l&G!fWPJ6$1$6`TDcf
z>Ma614J!n9z0zFH2%3yu2I~Z3FST<BIK<91*@#>M%ibd`aM&epbMJfbwn4)^Aew&w
zxtNR2x?iVs!}jE~j}zmCLGS}XlRG*c0BIo?(kKpOBOXK?<U#!_X6F}5@q-|nDkmJW
z3mX-p;?U6=00~wyIJDS=Go(7CKRd~e_f|c;WevIdh%*}0AinGuK?ocDTrRUNIN$JW
zMxO_sEf*k&%E0o%s0X?Wr7`7p@SO{%@{K*byw6>W2~B=q?Q-#@9u+7Eu0554WcU$>
ztMAkN44QZt=jKfL0cbCK+2UxGNmlk(s}E3QYtBh+j#qsVx6I52P-At$({R)8{k-jq
zhhe0Gl=OVvcbC2F;PljD5Vq#@Qx{SXi*@TR4n(yVRKI;Y`>nLJbRc{Pz7>T*Au~Pb
zsF=dXsl=!cJm9U>q%A#@m<JALy^Ar5pZP@&!y~}$LSAw^xgidguxV$i{DRQLjR{Kw
zN&X!uVtk$;R!c(Fk^*uBvO|n%f^{z1=l@cCF&0o;&pa3riu@7)oNfB({^Psb#@Uzp
zsH;h=b}TZLh5X4DT=^;G0roUUqQ7X{rD3GOwuOFuEd4&X|H40WbjRCIt|;4emDo2~
zju~0fGrKwojzez+5H%h?Hx@aRnTL4NA(RSsS|BVloWDT3(m8j~!0K#{30w-7tmI)j
zSH1;@rN!@)W15)HaEQXh={jf|I1q_wexq}OCSb*IoM^)@`w<PgZCS9{?@bGp9f<_I
z1w4>O24c=?2hUUNbXF`YrWT|+3*0&G2lT=zB#D(cjMrloJ?zCsdi-N+6Q>nRa+kF5
z_un4RMCo(#@^{9Ark)JA50+$6s0T1q?yNjNbKMkjB|~*54KeD|`f_QcYR1~HU0(6X
z(W4#;WkjgxmE@IC;5Ex+IQ`rH0{sWvwM`CQ$4KpVA2qXH?fUuk+4y5|EtT4vjowSc
zpBTX|fS(*c2!lg9uXw<kzP-@Vu|3$0HrwB-dP+XAE@x9Lm{kq?Q;x`d&g?O~Tv<W!
zx|G4&EgJw48U?DN?)v&VQQ=_hwN6aK$le05svbn0&CxUGlC(T`{M8fqn8Q(N8|B+h
z`d>%v)gk<L_}YDH5^<dWS81tmw|@Wdx|pl0>pRHVvuEGEe)8<`KWwS58&b%Jg8K0@
z%`dXl@I{%a<8oBQ;)s-wkBy*awSKp918T2poc*^l2T&v5bh^s&5Q?hH9$bKp>g)om
z(_CA~f}@fi{=UE#u#y1{QMR}1jWmhMI~0J>NdcmC%$!|Q9>^yTu4yHyf5oXEnjoNJ
zRSXznsZA#uRPI@x1hqANVAP&_A2Ktmdg|T6Ox*FT#-qlP>@JbVYd6l<-<9LjMv(a^
zQuXh)=i8tCI051i&(H<N5dU|2!f&$-Y<okEs?w$tLUw<|u;1XNbNAIX(1;O1g9gq}
z8pufvmLwP#jO7A5BID}_DP(&E*25=~!FZO<(S@RXYJyRB%9OEVhp_3gx{N(RuhV`M
zS@mmk<oq{$m0+ZoDK^e<>ZX{}9Lds<#y}!sij_l{qEzZG5zFOa;!<bN)|M6&EC$$B
zrl?I4Y`v!I&8PyfP>RrS4^zx?zF4m+*@D$uUgkVOB1;zbnm;RvN?P}ZfwpqjFa!A!
z0Dm7?1tAPdN2~jBbjz4{jO`3z92jJ<YFLfPmp4T?GwDo$+sB8V(U_-%g}56vKL=2G
zue`sj_33f>cc9>afViPQ6xI*nL{o>7(9h73O^42Kpxw}!E&Rfc#<3kj0W1_NDu=L`
z+ODTS_yF{N1PgCv#b<>8>??&ajl|y5VTvUO_xp4>usBxw7AO%!U`MgT9$EuH2h}V^
zAF_k*E<j${FAY~{S-QixoSm2231Mj%Th={kZ>=vDI<*T-SQ|ovIN^kl3VV>kql@rk
zGJJ&`VhP9o|3bJgPGSVNX1O?HXLpl%M`!pBoUA9gIjmw9LDCgFY^)_#@pHix5qRl_
zk+!lOrS30j>kJG&4XZ7@#X}|bS|5M4rxli#uKhT3J+}aotQx$gxLiFIdmeD@{m;^2
ztM7mjtOdZFD1G*<Yu(PTv}I{2Db{&GBE{VHPjow4A8<Erf0hNW1`I|c#)4<0C%?sI
z8TJWo47=So+#W!YKRK|eD~Ug1Hc~(DzRiYG+Ug;Wk&>*m-VWH4kNsPny(g@yaC&BD
zIUQtQYfpF;e{pefsi$UUHdgHr0hTxLzih_=Wzn#x581@tXHyTdYV*)e_-TaN?fL5T
z(x(OHGs7?v=aealKYusKk+XL!MBK~H{s-Z7V$#ugLpdUOru2Wb1jM-@VdC^$&}@9Y
z72DxCSq<tkFY1Z?!qalevMkB79W(ce22ys7WB%3XDj-QdTMyRENbLwNH2GN+0hp;0
z<>Q-&mVu?pE3Scm{r1G$>N0JSZ;U>1uCv8D=HNL1Ho9TjgVv}HU2ZJ=wrB?#bvs$`
zE&wdBODFy30&o(%)dJgBMlFOnhR8FL6m!-m5J>C41Md$4b(U~~9qa?_U>kVh2?;xl
zh+@!D7LuyJJjzhM_+IZdfOzq3cwRzn1L*>2;>hNMY7V;?(`Zh8E^vuQ1J&LaJgMcj
z?keyP1C5l%L@Vi56YdhkiF$pigG0zhgPR@fO(9AD0ILwNdqV-{lVr;HCR`^H!E7Yv
zk4{oHuy{+4SztdV&$OqRdBTtKFM;JRSEm0_mvb(`i26OA{HFRoI2qW$KwQ%!NlG}t
zKLbhHEb+yQdRCl5neHfgp|JkE+Sy%NZwVQR(YL}&yJwLLrWMtq&6@ABG&p>&vd>-k
z&tN^gQ9D?;V>U6?)gA4BQLz3d%VbR$D<HfgCT=Xqy|>2M^UzTweF@z$%<?5*Vb5Pc
z9B{%f#2apN+j(Zz+*~>U?BfePD1DTq@F(nlgVM3^D!9AFc+8_UC4oi20&8tTZxTP4
zJmma}Dw!LRcuZSesN=D_-RUlpV93etCg4~-w(qywSWJDqzsM%kF2<vUiiK3S3~HIU
zMw$(3pc9V}_6y_IA1yAlVSy*7qDe9|Q%SU$g<1hqrZQP8+#N1~XzavG>!ai$Pm3%o
z=Hu`fgj328I~HPh9-We&A@Et6wgD<Zw|{7nZlAC7;NHefI0(FNmhtbnh!2#~J-PKy
zye}#$a^1NInCaFN#j&7vuqPc-@EUUmjx;u-6uCRJSP*6#gIoC#xHG^=^SzGT@`QIi
z*DyVV3{ayaj{W@-27PsBM&fUqbeAW2#1rGOSJtT>y0KI>9%f|CBi!Xm)>^Ka`O>W`
zsJ0rNd9X(~4cprth!ilkQQOM_Ht>8I)Gv?zrzP<2zwHA5xA?4)>|E~r{q-^dgQVz-
z=(HNWJk|Z}sXS}o=y=+aev*~itcY)COp{(`zq|}o3&TN;e|K`4CB7Mk7+?PUKbqr)
z-+RG1(Y8?CAU9O>$uEZ=DlDXtq17zJF0Vc}5T*|x-ZSG(MI)Bvx2~d+|6)_8pjWNW
z-^A!9Q(=mjH6Q2X1&YB8IAcOyZaB6-g=hd~E-{&QnBMzDiBtr5Zcu=s=v?Zm9B%db
zm`TfPbQC^VKo<QREs~P*f{GiW3;5|JA65H8igV|p%L6I3b{4GkDDwKK=52qum8!Qf
zso&X-G56})SZOQgf{ox9T;Xss-DA*^JMJ+s_=rZ#zroTW#-3(XM)`x+tLF4`p?!o~
z+=#ei{H%46p+AYQvO#E$rd-P~M^9zLW_=uyn4Er5HQP^5-oM?OTmSTvt~YtxM?jW2
z<qFqlNBj;$SpdR$$<VtRgu{%7owNkQ1=7wYn7hs<1i=-ed#@jafrW0KGEFs|lM#3<
zl4A??yfjKg`#noNSY-!w!Rz1J3tks0rVf3D2L{Fz*R9;XET~Am9VU3|SH+MImbaO>
zhyc`uX2i?nWy(<?_1IXLnr<n}jNm{qr?_fuY#CY%eKL1&i)xWs`rU?(L81f?l#f<>
zr@ABO(Yn71#oJ){pGdca@s75ToN7pqH<ASMPlscG5(v!3fkeEIlRo--l)YvOR|M5^
zv$?JI@OMisK@Cun3qfyK4uNp)QFm|)h8e+iFrCTVu>WmWQUq$?HM?7*797Gww0Ba$
z!%46M?~4G7kUnY}LW$}9zU4N6oKM%=LPe>ns1EMH#U2Wp?Iutpwmd%6-l}vpDb&X)
z+Q#)oomM#a)agcqS0z;Eg6%C|Gm-DU=tC2kJqJksu9Co|VQMRPSfZ)=3k<1rL;AoK
zUPYZL)G$=l-IhFr1}37_GN;Z4HG%}A5623JtW8MJLg79+98m%GaNj#MBqSkBfA>8!
z$iMI1&B(>EdKGc<=-l6VI|%x1K;yS<qf)=@3WSzx>KY*%vx+UT{3C;_xyXwJoRj^<
zoeNJ5t71e_{~uELBV1gBozWR};u~sKCA*&fLMwW{po#DWczf!mou{YgwahUwN;T9O
zr6vnt<j_{)H`7sIAXW2I{9q^Zpk_YVAvRbVSZtD$_XW%%q9L3`^?`#e&kRF0m%-Ly
z;Nu7q<%Ew1<Z)eQz%CL;d34EuM#%OWUjEk?xJdZQL=f%~z(m`o=sB7UY{uKpl-&N;
zxE0tv5NRkRFweK!P~yK9o7zS_gtvwPfe#q?09^T-b8x8(A9pwDJ(X9RH9p@&t^W$E
zdH682vh}8|wlci1u+R9vbNSX6A5EUpBYC!O2Lx>9yj@rpCul^1z2;rGTR(HMci+uS
z`iR3#lggAAIDv~dUQA<qLemXjBO1H=s*&EO{~vE;0?VyPdoY#@fl?)$(3joF&ts+8
z=hB6FRR*{f6@fMW@_4OCy9V@t6A>kH#<Ip&WL4@k#o|g0;&%7rlSs3NIX{c76#1eY
o<VNW$J=@P{gBMgph7agr-1rw~X%qs$%eD}G9YgJ+i+1<^KW-LKFaQ7m

literal 0
HcmV?d00001

diff --git a/folly/docs/Format.md b/folly/docs/Format.md
new file mode 100644
index 00000000..2fcf8ac5
--- /dev/null
+++ b/folly/docs/Format.md
@@ -0,0 +1,181 @@
+`folly/Format.h`
+----------------
+
+`folly/Format.h` provides a fast, powerful, type-safe, flexible facility
+for formatting text, using a specification language similar to Python's
+[str.format](http://docs.python.org/library/string.html#formatstrings).
+By default, it can format strings, numbers (integral and floating point),
+and dynamically-typed `folly::dynamic` objects, and can extract values from
+random-access containers and string-keyed maps.  In many cases, `format` is
+faster than `sprintf` as well as being fully type-safe.
+
+To use `format`, you need to be using gcc 4.6 or later.  You'll want to include
+`folly/Format.h`.
+
+### Overview
+***
+
+Here are some code samples to get started:
+
+``` Cpp
+using folly::format;
+using folly::vformat;
+
+// Objects produced by format() can be streamed without creating
+// an intermediary string; {} yields the next argument using default
+// formatting.
+std::cout << format("The answers are {} and {}", 23, 42);
+// => "The answers are 23 and 42"
+
+// To insert a literal '{' or '}', just double it.
+std::cout << format("{} {{}} {{{}}}", 23, 42);
+// => "23 {} {42}"
+
+// Arguments can be referenced out of order, even multiple times
+std::cout << format("The answers are {1}, {0}, and {1} again", 23, 42);
+// => "The answers are 42, 23, and 42 again"
+
+// It's perfectly fine to not reference all arguments
+std::cout << format("The only answer is {1}", 23, 42);
+// => "The only answer is 42"
+
+// Values can be extracted from indexable containers
+// (random-access sequences and integral-keyed maps), and also from
+// string-keyed maps
+std::vector<int> v {23, 42};
+std::map<std::string, std::string> m { {"what", "answer"} };
+std::cout << format("The only {1[what]} is {0[1]}", v, m);
+// => "The only answer is 42"
+
+// If you only have one container argument, vformat makes the syntax simpler
+std::map<std::string, std::string> m { {"what", "answer"}, {"value", "42"} };
+std::cout << vformat("The only {what} is {value}", m);
+// => "The only answer is 42"
+// same as
+std::cout << format("The only {0[what]} is {0[value]}", m);
+// => "The only answer is 42"
+
+// {} works for vformat too
+std::vector<int> v {42, 23};
+std::cout << vformat("{} {}", v);
+// => "42 23"
+
+// format and vformat work with pairs and tuples
+std::tuple<int, std::string, int> t {42, "hello", 23};
+std::cout << vformat("{0} {2} {1}", t);
+// => "42 23 hello"
+
+// Format supports width, alignment, arbitrary fill, and various
+// format specifiers, with meanings similar to printf
+// "X<10": fill with 'X', left-align ('<'), width 10
+std::cout << format("{:X<10} {}", "hello", "world");
+// => "helloXXXXX world"
+
+// Format supports printf-style format specifiers
+std::cout << format("{0:05d} decimal = {0:04x} hex", 42);
+// => "00042 decimal = 002a hex"
+
+// Formatter objects may be written to a string using folly::to or
+// folly::toAppend (see folly/Conv.h), or by calling their appendTo(),
+// str(), and fbstr() methods
+std::string s = format("The only answer is {}", 42).str();
+std::cout << s;
+// => "The only answer is 42"
+```
+
+
+### Format string syntax
+***
+
+Format string (`format`):
+`"{" [arg_index] ["[" key "]"] [":" format_spec] "}"`
+
+- `arg_index`: index of argument to format; default = next argument.  Note
+  that a format string may have either default argument indexes or
+  non-default argument indexes, but not both (to avoid confusion).
+- `key`: if the argument is a container (C-style array or pointer,
+  `std::array`, vector, deque, map), you may use this
+  to select the element to format; works with random-access sequences and
+  integer- and string-keyed maps.  Multiple level keys work as well, with
+  components separated with "."; for example, given
+  `map<string, map<string, string>> m`, `{[foo.bar]}` selects
+  `m["foo"]["bar"]`.
+- `format_spec`: format specification, see below
+
+Format string (`vformat`):
+`"{" [ key ] [":" format_spec] "}"`
+
+- `key`: select the argument to format from the container argument;
+  works with random-access sequences and integer- and string-keyed maps.
+  Multiple level keys work as well, with components separated with "."; for
+  example, given `map<string, map<string, string>> m`, `{foo.bar}` selects
+  `m["foo"]["bar"]`.
+- `format_spec`: format specification, see below
+
+Format specification:
+`[[fill] align] [sign] ["#"] ["0"] [width] [","] ["." precision] [type]`
+
+- `fill` (may only be specified if `align` is also specified): pad with this
+  character (' ' (space) or '`0`' (zero) might be useful; space is default)
+- `align`: one of '`<`', '`>`', '`=`', '`^`':
+    - '`<`': left-align (default for most objects)
+    - '`>`': right-align (default for numbers)
+    - '`=`': pad after sign, but before significant digits; used to print
+            `-0000120`; only valid for numbers
+    - '`^`': center
+- `sign`: one of '`+`', '`-`', ' ' (space) (only valid for numbers)
+    - '`+`': output '`+`' if positive or zero, '`-`' if negative
+    - '`-`': output '`-`' if negative, nothing otherwise (default)
+    - ' ' (space): output ' ' (space) if positive or zero, '`-`' if negative
+- '`#`': output base prefix (`0` for octal, `0b` or `0B` for binary, `0x` or
+  `0X` for hexadecimal; only valid for integers)
+- '`0`': 0-pad after sign, same as specifying "`0=`" as the `fill` and
+  `align` parameters (only valid for numbers)
+- `width`: minimum field width
+- '`,`' (comma): output comma as thousands' separator (only valid for integers,
+  and only for decimal output)
+- `precision` (not allowed for integers):
+    - for floating point values, number of digits after decimal point ('`f`' or
+      '`F`' presentation) or number of significant digits ('`g`' or '`G`')
+    - for others, maximum field size (truncate subsequent characters)
+- `type`: presentation format, see below
+
+Presentation formats:
+
+- Strings (`folly::StringPiece`, `std::string`, `folly::fbstring`,
+  `const char*`):
+    - '`s`' (default)
+- Integers:
+    - '`b`': output in binary (base 2) ("`0b`" prefix if '`#`' specified)
+    - '`B`': output in binary (base 2) ("`0B`" prefix if '`#`' specified)
+    - '`c`': output as a character (cast to `char`)
+    - '`d`': output in decimal (base 10) (default)
+    - '`o`': output in octal (base 8)
+    - '`O`': output in octal (base 8) (same as '`o`')
+    - '`x`': output in hexadecimal (base 16) (lower-case digits above 9)
+    - '`X`': output in hexadecimal (base 16) (upper-case digits above 9)
+    - '`n`': locale-aware output (currently same as '`d`')
+- `bool`:
+    - default: output "`true`" or "`false`" as strings
+    - integer presentations allowed as well
+- `char`:
+    - same as other integers, but default is '`c`' instead of '`d`'
+- Floating point (`float`, `double`; `long double` is not implemented):
+    - '`e`': scientific notation using '`e`' as exponent character
+    - '`E`': scientific notation using '`E`' as exponent character
+    - '`f'`: fixed point
+    - '`F'`: fixed point (same as '`f`')
+    - '`g`': general; use either '`f`' or '`e`' depending on magnitude (default)
+    - '`G`': general; use either '`f`' or '`E`' depending on magnitude
+    - '`n`': locale-aware version of '`g`' (currently same as '`g`')
+    - '`%`': percentage: multiply by 100 then display as '`f`'
+
+
+### Extension
+***
+
+You can extend Formatter for your own class by providing a specialization for
+`folly::FormatValue`.  See `folly/Format.h` and `folly/FormatArg.h` for
+details, and the existing specialization for `folly::dynamic` in
+`folly/dynamic-inl.h` for an implementation example.
+
diff --git a/folly/docs/GroupVarint.md b/folly/docs/GroupVarint.md
new file mode 100644
index 00000000..c490c610
--- /dev/null
+++ b/folly/docs/GroupVarint.md
@@ -0,0 +1,46 @@
+`folly/GroupVarint.h`
+---------------------
+
+`folly/GroupVarint.h` is an implementation of variable-length encoding for 32-
+and 64-bit integers using the Group Varint encoding scheme as described in
+Jeff Dean's [WSDM 2009 talk][wsdm] and in [Information Retrieval: Implementing
+and Evaluating Search Engines][irbook].
+
+[wsdm]: http://research.google.com/people/jeff/WSDM09-keynote.pdf
+[irbook]: http://www.ir.uwaterloo.ca/book/addenda-06-index-compression.html
+
+Briefly, a group of four 32-bit integers is encoded as a sequence of variable
+length, between 5 and 17 bytes; the first byte encodes the length (in bytes)
+of each integer in the group.  A group of five 64-bit integers is encoded as a
+sequence of variable length, between 7 and 42 bytes; the first two bytes
+encode the length (in bytes) of each integer in the group.
+
+`GroupVarint.h` defines a few classes:
+
+* `GroupVarint<T>`, where `T` is `uint32_t` or `uint64_t`:
+
+    Basic encoding / decoding interface, mainly aimed at encoding / decoding
+    one group at a time.
+
+* `GroupVarintEncoder<T, Output>`, where `T` is `uint32_t` or `uint64_t`,
+  and `Output` is a functor that accepts `StringPiece` objects as arguments:
+
+    Streaming encoder: add values one at a time, and they will be
+    flushed to the output one group at a time.  Handles the case where
+    the last group is incomplete (the number of integers to encode isn't
+    a multiple of the group size)
+
+* `GroupVarintDecoder<T>`, where `T` is `uint32_t` or `uint64_t`:
+
+    Streaming decoder: extract values one at a time.  Handles the case where
+    the last group is incomplete.
+
+The 32-bit implementation is significantly faster than the 64-bit
+implementation; on platforms supporting the SSSE3 instruction set, we
+use the PSHUFB instruction to speed up lookup, as described in [SIMD-Based
+Decoding of Posting Lists][cikmpaper] (CIKM 2011).
+
+[cikmpaper]: http://www.stepanovpapers.com/CIKM_2011.pdf
+
+For more details, see the header file `folly/GroupVarint.h` and the
+associated test file `folly/test/GroupVarintTest.cpp`.
diff --git a/folly/docs/Histogram.md b/folly/docs/Histogram.md
new file mode 100644
index 00000000..03bc3e81
--- /dev/null
+++ b/folly/docs/Histogram.md
@@ -0,0 +1,104 @@
+`folly/Histogram.h`
+-------------------
+
+### Classes
+***
+
+#### `Histogram`
+
+`Histogram.h` defines a simple histogram class, templated on the type of data
+you want to store.  This class is useful for tracking a large stream of data
+points, where you want to remember the overall distribution of the data, but do
+not need to remember each data point individually.
+
+Each histogram bucket stores the number of data points that fell in the bucket,
+as well as the overall sum of the data points in the bucket.  Note that no
+overflow checking is performed, so if you have a bucket with a large number of
+very large values, it may overflow and cause inaccurate data for this bucket.
+As such, the histogram class is not well suited to storing data points with
+very large values.  However, it works very well for smaller data points such as
+request latencies, request or response sizes, etc.
+
+In addition to providing access to the raw bucket data, the `Histogram` class
+also provides methods for estimating percentile values.  This allows you to
+estimate the median value (the 50th percentile) and other values such as the
+95th or 99th percentiles.
+
+All of the buckets have the same width.  The number of buckets and bucket width
+is fixed for the lifetime of the histogram.  As such, you do need to know your
+expected data range ahead of time in order to have accurate statistics.  The
+histogram does keep one bucket to store all data points that fall below the
+histogram minimum, and one bucket for the data points above the maximum.
+However, because these buckets don't have a good lower/upper bound, percentile
+estimates in these buckets may be inaccurate.
+
+#### `HistogramBuckets`
+
+The `Histogram` class is built on top of `HistogramBuckets`.
+`HistogramBuckets` provides an API very similar to `Histogram`, but allows a
+user-defined bucket class.  This allows users to implement more complex
+histogram types that store more than just the count and sum in each bucket.
+
+When computing percentile estimates `HistogramBuckets` allows user-defined
+functions for computing the average value and data count in each bucket.  This
+allows you to define more complex buckets which may have multiple different
+ways of computing the average value and the count.
+
+For example, one use case could be tracking timeseries data in each bucket.
+Each set of timeseries data can have independent data in the bucket, which can
+show how the data distribution is changing over time.
+
+### Example Usage
+***
+
+Say we have code that sends many requests to remote services, and want to
+generate a histogram showing how long the requests take.  The following code
+will initialize histogram with 50 buckets, tracking values between 0 and 5000.
+(There are 50 buckets since the bucket width is specified as 100.  If the
+bucket width is not an even multiple of the histogram range, the last bucket
+will simply be shorter than the others.)
+
+``` Cpp
+    folly::Histogram<int64_t> latencies(100, 0, 5000);
+```
+
+The addValue() method is used to add values to the histogram.  Each time a
+request finishes we can add its latency to the histogram:
+
+``` Cpp
+    latencies.addValue(now - startTime);
+```
+
+You can access each of the histogram buckets to display the overall
+distribution.  Note that bucket 0 tracks all data points that were below the
+specified histogram minimum, and the last bucket tracks the data points that
+were above the maximum.
+
+``` Cpp
+    unsigned int numBuckets = latencies.getNumBuckets();
+    cout << "Below min: " << latencies.getBucketByIndex(0).count << "\n";
+    for (unsigned int n = 1; n < numBuckets - 1; ++n) {
+      cout << latencies.getBucketMin(n) << "-" << latencies.getBucketMax(n)
+           << ": " << latencies.getBucketByIndex(n).count << "\n";
+    }
+    cout << "Above max: "
+         << latencies.getBucketByIndex(numBuckets - 1).count << "\n";
+```
+
+You can also use the `getPercentileEstimate()` method to estimate the value at
+the Nth percentile in the distribution.  For example, to estimate the median,
+as well as the 95th and 99th percentile values:
+
+``` Cpp
+    int64_t median = latencies.getPercentileEstimate(0.5);
+    int64_t p95 = latencies.getPercentileEstimate(0.95);
+    int64_t p99 = latencies.getPercentileEstimate(0.99);
+```
+
+### Thread Safety
+***
+
+Note that `Histogram` and `HistogramBuckets` objects are not thread-safe.  If
+you wish to access a single `Histogram` from multiple threads, you must perform
+your own locking to ensure that multiple threads do not access it at the same
+time.
diff --git a/folly/docs/Makefile b/folly/docs/Makefile
new file mode 100644
index 00000000..97be3da9
--- /dev/null
+++ b/folly/docs/Makefile
@@ -0,0 +1,33 @@
+SOURCES=$(wildcard *.md)
+PDF=$(SOURCES:%.md=%.pdf)
+HTML=$(SOURCES:%.md=%.html)
+INSTALL=install -c -m 644
+PYTHON=python
+PANDOCARGS=-s
+PANDOC=/usr/bin/pandoc
+
+export LANGUAGE=C
+export LC_ALL=C
+
+all: html index.html
+
+pdf: $(PDF)
+
+html: $(HTML)
+
+# This needs pandoc 1.9 or later to work
+%.pdf: %.md
+	$(PANDOC) -f markdown -o $*.pdf $*.md
+
+%.html: %.md style.css
+	$(PANDOC) $(PANDOCARGS) -H style.css -f markdown -t html --toc -o $*.html $*.md
+
+docs.md: $(SOURCES) style.css
+	$(PANDOC) $(PANDOCARGS) -H style.css -f markdown -t markdown --toc -o $@ *.md
+
+index.html: $(SOURCES) style.css
+	$(PANDOC) $(PANDOCARGS) -H style.css -f markdown -t html --toc -o $@ *.md
+
+
+clean:
+	$(RM) $(PDF) $(HTML) index.html
diff --git a/folly/docs/Overview.md b/folly/docs/Overview.md
new file mode 100644
index 00000000..106b385e
--- /dev/null
+++ b/folly/docs/Overview.md
@@ -0,0 +1,259 @@
+`folly/`
+------
+
+### Introduction
+
+Folly (acronymed loosely after Facebook Open Source Library) is a
+library of C++11 components designed with practicality and efficiency
+in mind. It complements (as opposed to competing against) offerings
+such as Boost and of course `std`. In fact, we embark on defining our
+own component only when something we need is either not available, or
+does not meet the needed performance profile.
+
+Performance concerns permeate much of Folly, sometimes leading to
+designs that are more idiosyncratic than they would otherwise be (see
+e.g. `PackedSyncPtr.h`, `SmallLocks.h`). Good performance at large
+scale is a unifying theme in all of Folly.
+
+### Logical Design
+
+Folly is a collection of relatively independent components, some as
+simple as a few symbols. There is no restriction on internal
+dependencies, meaning that a given folly module may use any other
+folly components.
+
+All symbols are defined in the top-level namespace `folly`, except of
+course macros. Macro names are ALL_UPPERCASE. Namespace `folly`
+defines other internal namespaces such as `internal` or `detail`. User
+code should not depend on symbols in those namespaces.
+
+### Physical Design
+
+At the top level Folly uses the classic "stuttering" scheme
+`folly/folly` used by Boost and others. The first directory serves as
+an installation root of the library (with possible versioning a la
+`folly-1.0/`), and the second is to distinguish the library when
+including files, e.g. `#include "folly/FBString.h"`.
+
+The directory structure is flat (mimicking the namespace structure),
+i.e. we don't have an elaborate directory hierarchy (it is possible
+this will change in future versions). The subdirectory `experimental`
+contains files that are used inside folly and possibly at Facebook but
+not considered stable enough for client use. Your code should not use
+files in `folly/experimental` lest it may break when you update Folly.
+
+The `folly/folly/test` subdirectory includes the unittests for all
+components, usually named `ComponentXyzTest.cpp` for each
+`ComponentXyz.*`. The `folly/folly/docs` directory contains
+documentation.
+
+### Compatibility
+
+Currently, `folly` has been tested on gcc 4.6 on 64-bit installations
+of Fedora 17, Ubuntu 12.04, and Debian wheezy. It might work unmodified
+on other 64-bit Linux platforms.
+
+### Components
+
+Below is a list of Folly components in alphabetical order, along with
+a brief description of each.
+
+#### `Arena.h`, `ThreadCachedArena.h`
+
+Simple arena for memory allocation: multiple allocations get freed all
+at once. With threaded version.
+
+#### [`AtomicHashMap.h`, `AtomicHashArray.h`](AtomicHashMap.md)
+
+High-performance atomic hash map with almost lock-free operation.
+
+#### [`Benchmark.h`](Benchmark.md)
+
+A small framework for benchmarking code. Client code registers
+benchmarks, optionally with an argument that dictates the scale of the
+benchmark (iterations, working set size etc). The framework runs
+benchmarks (subject to a command-line flag) and produces formatted
+output with timing information.
+
+#### `Bits.h`
+
+Various bit manipulation utilities optimized for speed.
+
+#### `Bits.h`
+
+Bit-twiddling functions that wrap the
+[ffsl(l)](http://linux.die.net/man/3/ffsll) primitives in a uniform
+interface.
+
+#### `ConcurrentSkipList.h`
+
+An implementation of the structure described in [A Provably Correct
+Scalable Concurrent Skip
+List](http://www.cs.tau.ac.il/~shanir/nir-pubs-web/Papers/OPODIS2006-BA.pdf)
+by Herlihy et al.
+
+#### [`Conv.h`](Conv.md)
+
+A variety of data conversion routines (notably to and from string),
+optimized for speed and safety.
+
+#### `DiscriminatedPtr.h`
+
+Similar to `boost::variant`, but restricted to pointers only. Uses the
+highest-order unused 16 bits in a pointer as discriminator. So
+`sizeof(DiscriminatedPtr<int, string, Widget>) == sizeof(void*)`.
+
+#### [`dynamic.h`](Dynamic.md)
+
+Dynamically-typed object, created with JSON objects in mind.
+
+#### `Endian.h`
+
+Endian conversion primitives.
+
+####`Escape.h`
+
+Escapes a string in C style.
+
+####`eventfd.h`
+
+Wrapper around the
+[`eventfd`](http://www.kernel.org/doc/man-pages/online/pages/man2/eventfd.2.html)
+system call.
+
+####[`FBString.h`](FBString.md)
+
+A drop-in implementation of `std::string` with a variety of optimizations.
+
+####[`FBVector.h`](FBVector.md)
+
+A mostly drop-in implementation of `std::vector` with a variety of
+optimizations.
+
+####`Foreach.h`
+
+Pseudo-statements (implemented as macros) for iteration.
+
+####[`Format.h`](Format.md)
+
+Python-style formatting utilities.
+
+####[`GroupVarint.h`](GroupVarint.md)
+
+[Group Varint
+encoding](http://www.ir.uwaterloo.ca/book/addenda-06-index-compression.html)
+for 32-bit values.
+
+####`Hash.h`
+
+Various popular hash function implementations.
+
+####[`Histogram.h`](Histogram.md)
+
+A simple class for collecting histogram data.
+
+####`IntrusiveList.h`
+
+Convenience type definitions for using `boost::intrusive_list`.
+
+####`json.h`
+
+JSON serializer and deserializer. Uses `dynamic.h`.
+
+####`Likely.h`
+
+Wrappers around [`__builtin_expect`](http://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html).
+
+####`Malloc.h`
+
+Memory allocation helpers, particularly when using jemalloc.
+
+####`MapUtil.h`
+
+Helpers for finding items in associative containers (such as
+`std::map` and `std::unordered_map`).
+
+####[`PackedSyncPtr.h`](PackedSyncPtr.md)
+
+A highly specialized data structure consisting of a pointer, a 1-bit
+spin lock, and a 15-bit integral, all inside one 64-bit word.
+
+####`Preprocessor.h`
+
+Necessarily evil stuff.
+
+####`PrettyPrint.h`
+
+Pretty-printer for numbers that appends suffixes of unit used: bytes
+(kb, MB, ...), metric suffixes (k, M, G, ...), and time (s, ms, us,
+ns, ...).
+
+####[`ProducerConsumerQueue.h`](ProducerConsumerQueue.md)
+
+Lock free single-reader, single-writer queue.
+
+####`Random.h`
+
+Defines only one function---`randomNumberSeed()`.
+
+####`Range.h`
+
+Boost-style range facility and the `StringPiece` specialization.
+
+####`RWSpinLock.h`
+
+Fast and compact reader-writer spin lock.
+
+####`ScopeGuard.h`
+
+C++11 incarnation of the old [ScopeGuard](http://drdobbs.com/184403758) idiom.
+
+####[`SmallLocks.h`](SmallLocks.md)
+
+Very small spin locks (1 byte and 1 bit).
+
+####`small_vector.h`
+
+Vector with the small buffer optimization and an ptional embedded
+`PicoSpinLock`.
+
+####`sorted_vector_types.h`
+
+Collections similar to `std::map` but implemented as sorted vectors.
+
+####`StlAllocator.h`
+
+STL allocator wrapping a simple allocate/deallocate interface.
+
+####`String.h`
+
+String utilities that connect `folly::fbstring` with `std::string`.
+
+####[`Synchronized.h`](Synchronized.md)
+
+High-level synchronization library.
+
+####`System.h`
+
+Demangling and errno utilities.
+
+####[`ThreadCachedInt.h`](ThreadCachedInt.md)
+
+High-performance atomic increment using thread caching.
+
+####[`ThreadLocal.h`](ThreadLocal.md)
+
+Improved thread local storage for non-trivial types.
+
+####`TimeoutQueue.h`
+
+Queue with per-item timeout.
+
+####`Traits.h`
+
+Type traits that complement those defined in the standard C++11 header
+`<traits>`.
+
+####`Unicode.h`
+
+Defines the `codePointToUtf8` function.
diff --git a/folly/docs/PackedSyncPtr.md b/folly/docs/PackedSyncPtr.md
new file mode 100644
index 00000000..b77b84e9
--- /dev/null
+++ b/folly/docs/PackedSyncPtr.md
@@ -0,0 +1,77 @@
+`folly/PackedSyncPtr.h`
+----------------------
+
+A highly specialized data structure consisting of a pointer, a 1-bit
+spin lock, and a 15-bit integral packed into `sizeof(void*)`.
+
+Typical application is for microsharding of many elements within containers.
+Because there is no memory overhead, an arbitrarily large number of locks can be
+used to minimize lock contention with no memory penalty.  Additionally,
+excellent cache performance is obtained by storing the lock inline with the
+pointer (no additional cache miss or false sharing).  Finally, because it uses a
+simple spinlock mechanism, the cost of aqcuiring an uncontended lock is minimal.
+
+### Usage
+***
+
+This is not a "smart" pointer: nothing automagical is going on
+here.  Locking is up to the user.  Resource deallocation is up to
+the user.  Locks are never acquired or released outside explicit
+calls to lock() and unlock().
+
+Change the value of the raw pointer with set(), but you must hold
+the lock when calling this function if multiple threads could be
+using it.
+
+Here is an example of using a PackedSyncPtr to build a synchronized vector with
+no memory overhead - the spinlock and size are stored in the 16 unused bits of
+pointer, the rest of which points to the actual data.  See
+`folly/small_vector.h` for a complete implementation of this concept.
+
+``` Cpp
+    template<typename T>
+    class SyncVec {
+      PackedSyncPtr<T> base;
+
+     public:
+      SyncVec() { base.init(); }
+
+      void push_back(const T& t) {
+        base.set(
+          static_cast<T*>(realloc(base.get(), (base.extra() + 1) * sizeof(T))));
+        base[base.extra()] = t;
+        base.setExtra(base.extra() + 1);
+      }
+
+      size_t size() const {
+        return base.extra();
+      }
+
+      void lock() {
+        base.lock();
+      }
+
+      void unlock() {
+        base.unlock();
+      }
+
+      T* begin() const {
+        return base.get();
+      }
+
+      T* end() const {
+        return base.get() + base.extra();
+      }
+    };
+```
+
+### Implementation
+***
+
+This is using an x64-specific detail about the effective virtual
+address space.  Long story short: the upper two bytes of all our
+pointers will be zero in reality---and if you have a couple billion
+such pointers in core, it makes pretty good sense to try to make
+use of that memory.  The exact details can be perused here:
+
+[http://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses](http://en.wikipedia.org/wiki/X86-64#Canonical_form_addresses)
diff --git a/folly/docs/ProducerConsumerQueue.md b/folly/docs/ProducerConsumerQueue.md
new file mode 100644
index 00000000..6dc0344b
--- /dev/null
+++ b/folly/docs/ProducerConsumerQueue.md
@@ -0,0 +1,40 @@
+`folly/ProducerConsumerQueue.h`
+-------------------------------
+
+The `folly::ProducerConsumerQueue` class is a one-producer
+one-consumer queue with very low synchronization overhead.
+
+The queue must be created with a fixed maximum size (and allocates
+that many cells of sizeof(T)), and it provides just three simple
+operations: read, write, and isFull.  All of these operations are
+wait-free.  The read and write operations must only be called by the
+reader and writer thread, respectively, but isFull is accessible to
+both.
+
+Both read and write may fail if the queue is full, so in many
+situations it is important to choose the queue size such that the
+queue filling up for long is unlikely.
+
+### Example
+***
+
+A toy example that doesn't really do anything useful:
+
+``` Cpp
+    folly::ProducerConsumerQueue<folly::fbstring> queue;
+
+    std::thread reader([&queue] {
+      for (;;) {
+        folly::fbstring str;
+        while (!queue.read(str)) continue;
+
+        sink(str);
+      }
+    });
+
+    // producer thread:
+    for (;;) {
+      folly::fbstring str = source();
+      while (!queue.write(str)) continue;
+    }
+```
diff --git a/folly/docs/SmallLocks.md b/folly/docs/SmallLocks.md
new file mode 100644
index 00000000..f1ee4ee3
--- /dev/null
+++ b/folly/docs/SmallLocks.md
@@ -0,0 +1,24 @@
+`folly/SmallLocks.h`
+--------------------
+
+This module is currently x64 only.
+
+This header defines two very small mutex types.  These are useful in
+highly memory-constrained environments where contention is unlikely.
+The purpose of these is to allow fine-grained locking in massive data
+structures where memory is at a premium.  Often, each record may have
+a spare bit or byte lying around, so sometimes these can be tacked on
+with no additional memory cost.
+
+There are two types exported from this header.  `MicroSpinLock` is a
+single byte lock, and `PicoSpinLock` can be wrapped around an
+integer to use a single bit as a lock.  Why do we have both?
+Because you can't use x64 `bts` on a single byte, so
+`sizeof(MicroSpinLock)` is smaller than `sizeof(PicoSpinLock)` can
+be, giving it some use cases.
+
+Both the locks in this header model the C++11 Lockable concept.  So
+you can use `std::lock_guard` or `std::unique_lock` to lock them in an
+RAII way if you want.
+
+Additional information is in the header.
diff --git a/folly/docs/Synchronized.md b/folly/docs/Synchronized.md
new file mode 100644
index 00000000..5e719037
--- /dev/null
+++ b/folly/docs/Synchronized.md
@@ -0,0 +1,606 @@
+`folly/Synchronized.h`
+----------------------
+
+`folly/Synchronized.h` introduces a simple abstraction for mutex-
+based concurrency. It replaces convoluted, unwieldy, and just
+plain wrong code with simple constructs that are easy to get
+right and difficult to get wrong.
+
+### Motivation
+
+Many of our multithreaded Thrift services (not to mention general
+concurrent C++ code) use shared data structures associated with
+locks. This follows the time-honored adage of mutex-based
+concurrency control "associate mutexes with data, not code".
+Examples are abundant and easy to find. For example:
+
+``` Cpp
+
+    class AdPublisherHandler : public AdPopulatorIf,
+                               public fb303::FacebookBase,
+                               public ZkBaseApplication {
+      ...
+      OnDemandUpdateIdMap adsToBeUpdated_;
+      ReadWriteMutex adsToBeUpdatedLock_;
+
+      OnDemandUpdateIdMap limitsToBeUpdated_;
+      ReadWriteMutex limitsToBeUpdatedLock_;
+
+      OnDemandUpdateIdMap campaignsToBeUpdated_;
+      ReadWriteMutex campaignsToBeUpdatedLock_;
+      ...
+    };
+```
+
+Whenever the code needs to read or write some of the protected
+data, it acquires the mutex for reading or for reading and
+writing. For example:
+
+``` Cpp
+    void AdPublisherHandler::requestUpdateAdId(const int64_t adId,
+                                               const int32_t dbId) {
+      checkDbHandlingStatus(dbId);
+      RWGuard g(adsToBeUpdatedLock_, RW_WRITE);
+      adsToBeUpdated_[dbId][adId] = 1;
+      adPublisherMonitor_->addStatValue("request_adId_update", 1, dbId);
+      LOG(INFO) << "received request to update ad id " << adId;
+    }
+```
+
+The pattern is an absolute classic and present everywhere.
+However, it is inefficient, makes incorrect code easy to
+write, is prone to deadlocking, and is bulkier than it could
+otherwise be. To expand:
+
+* In the code above, for example, the critical section is only
+  the line right after `RWGuard`'s definition; it is frivolous
+  that everything else (including a splurging `LOG(INFO)`) keeps
+  the lock acquired for no good reason. This is because the
+  locked regions are not visible; the guard's construction
+  introduces a critical section as long as the remainder of the
+  current scope.
+* The correctness of the technique is entirely predicated on
+  convention. There is no ostensible error for code that:
+
+    * manipulates a piece of data without acquiring its lock first
+    * acquires a different lock instead of the intended one
+    * acquires a lock in read mode but modifies the guarded data structure
+    * acquires a lock in read-write mode although it only has `const`
+      access to the guarded data
+    * acquires one lock when another lock is already held, which may
+      lead to deadlocks if another thread acquires locks in the
+      inverse order
+
+### Introduction to `folly/Synchronized.h`
+
+The same code sample could be rewritten with `Synchronized`
+as follows:
+
+``` Cpp
+    class AdPublisherHandler : public AdPopulatorIf,
+                               public fb303::FacebookBase,
+                               public ZkBaseApplication {
+      ...
+      Synchronized<OnDemandUpdateIdMap>
+        adsToBeUpdated_,
+        limitsToBeUpdated_,
+        campaignsToBeUpdated_;
+      ...
+    };
+
+    void AdPublisherHandler::requestUpdateAdId(const int64_t adId,
+                                               const int32_t dbId) {
+      checkDbHandlingStatus(dbId);
+      SYNCHRONIZED (adsToBeUpdated_) {
+        adsToBeUpdated_[dbId][adId] = 1;
+      }
+      adPublisherMonitor_->addStatValue("request_adId_update", 1, dbId);
+      LOG(INFO) << "received request to update ad id " << adId;
+    }
+```
+
+The rewrite does at maximum efficiency what needs to be done:
+acquires the lock associated with the `OnDemandUpdateIdMap`
+object, writes to the map, and releases the lock immediately
+thereafter.
+
+On the face of it, that's not much to write home about, and not
+an obvious improvement over the previous state of affairs. But
+the features at work invisible in the code above are as important
+as those that are visible:
+
+* Unlike before, the data and the mutex protecting it are
+  inextricably encapsulated together.
+* Critical sections are readily visible and emphasize code that
+  needs to do minimal work and be subject to extra scrutiny.
+* Dangerous nested `SYNCHRONIZED` statements are more visible
+  than sequenced declarations of guards at the same level. (This
+  is not foolproof because a method call issued inside a
+  `SYNCHRONIZED` scope may open its own `SYNCHRONIZED` block.) A
+  construct `SYNCHRONIZED_DUAL`, discussed later in this
+  document, allows locking two objects quasi-simultaneously in
+  the same order in all threads, thus avoiding deadlocks.
+* If you tried to use `adsToBeUpdated_` outside the
+  `SYNCHRONIZED` scope, you wouldn't be able to; it is virtually
+  impossible to tease the map object without acquiring the
+  correct lock. However, inside the `SYNCHRONIZED` scope, the
+  *same* name serves as the actual underlying object of type
+  `OnDemandUpdateIdMap` (which is a map of maps).
+* Outside `SYNCHRONIZED`, if you just want to call one
+  method, you can do so by using `adsToBeUpdated_` as a
+  pointer like this:
+
+    `adsToBeUpdated_->clear();`
+
+This acquires the mutex, calls `clear()` against the underlying
+map object, and releases the mutex immediately thereafter.
+
+`Synchronized` offers several other methods, which are described
+in detail below.
+
+### Template class `Synchronized<T>`
+
+##### Constructors
+
+The default constructor default-initializes the data and its
+associated mutex.
+
+
+The copy constructor locks the source for reading and copies its
+data into the target. (The target is not locked as an object
+under construction is only accessed by one thread.)
+
+Finally, `Synchronized<T>` defines an explicit constructor that
+takes an object of type `T` and copies it. For example:
+
+``` Cpp
+    // Default constructed
+    Synchronized< map<string, int> > syncMap1;
+
+    // Copy constructed
+    Synchronized< map<string, int> > syncMap2(syncMap1);
+
+    // Initializing from an existing map
+    map<string, int> init;
+    init["world"] = 42;
+    Synchronized< map<string, int> > syncMap3(init);
+    EXPECT_EQ(syncMap3->size(), 1);
+```
+
+#### Assignment, swap, and copying
+
+The canonical assignment operator locks both objects involved and
+then copies the underlying data objects. The mutexes are not
+copied. The locks are acquired in increasing address order, so
+deadlock is avoided. For example, there is no problem if one
+thread assigns `a = b` and the other assigns `b = a` (other than
+that design probably deserving a Razzie award). Similarly, the
+`swap` method takes a reference to another `Synchronized<T>`
+object and swaps the data. Again, locks are acquired in a well-
+defined order. The mutexes are not swapped.
+
+An additional assignment operator accepts a `const T&` on the
+right-hand side. The operator copies the datum inside a
+critical section.
+
+An additional `swap` method accepts a `T&` and swaps the data
+inside a critical section. This is by far the preferred method of
+changing the guarded datum wholesale because it keeps the lock
+only for a short time, thus lowering the pressure on the mutex.
+
+To get a copy of the guarded data, there are two methods
+available: `void copy(T*)` and `T copy()`. The first copies data
+to a provided target and the second returns a copy by value. Both
+operations are done under a read lock. Example:
+
+``` Cpp
+    Synchronized< fbvector<fbstring> > syncVec1, syncVec2;
+    fbvector<fbstring> vec;
+
+    // Assign
+    syncVec1 = syncVec2;
+    // Assign straight from vector
+    syncVec1 = vec;
+
+    // Swap
+    syncVec1.swap(syncVec2);
+    // Swap with vector
+    syncVec1.swap(vec);
+
+    // Copy to given target
+    syncVec1.copy(&vec);
+    // Get a copy by value
+    auto copy = syncVec1.copy();
+```
+
+#### `LockedPtr operator->()` and `ConstLockedPtr operator->() const`
+
+We've already seen `operator->` at work. Essentially calling a
+method `obj->foo(x, y, z)` calls the method `foo(x, y, z)` inside
+a critical section as long-lived as the call itself. For example:
+
+``` Cpp
+    void fun(Synchronized< fbvector<fbstring> > & vec) {
+      vec->push_back("hello");
+      vec->push_back("world");
+    }
+```
+
+The code above appends two elements to `vec`, but the elements
+won't appear necessarily one after another. This is because in
+between the two calls the mutex is released, and another thread
+may modify the vector. At the cost of anticipating a little, if
+you want to make sure you insert "world" right after "hello", you
+should do this:
+
+``` Cpp
+    void fun(Synchronized< fbvector<fbstring> > & vec) {
+      SYNCHRONIZED (vec) {
+        vec.push_back("hello");
+        vec.push_back("world");
+      }
+    }
+```
+
+This brings us to a cautionary discussion. The way `operator->`
+works is rather ingenious with creating an unnamed temporary that
+enforces locking and all, but it's not a panacea. Between two
+uses of `operator->`, other threads may change the synchronized
+object in arbitrary ways, so you shouldn't assume any sort of
+sequential consistency. For example, the innocent-looking code
+below may be patently wrong.
+
+If another thread clears the vector in between the call to
+`empty` and the call to `pop_back`, this code ends up attempting
+to extract an element from an empty vector. Needless to say,
+iteration a la:
+
+``` Cpp
+    // No. NO. NO!
+    FOR_EACH_RANGE (i, vec->begin(), vec->end()) {
+      ...
+    }
+```
+
+is a crime punishable by long debugging nights.
+
+If the `Synchronized<T>` object involved is `const`-qualified,
+then you'll only be able to call `const` methods through `operator-
+>`. So, for example, `vec->push_back("xyz")` won't work if `vec`
+were `const`-qualified. The locking mechanism capitalizes on the
+assumption that `const` methods don't modify their underlying
+data and only acquires a read lock (as opposed to a read and
+write lock), which is cheaper but works only if the immutability
+assumption holds. Note that this is strictly not the case because
+`const`-ness can always be undone via `mutable` members, casts,
+and surreptitious access to shared data. Our code is seldom
+guilty of such, and we also assume the STL uses no shenanigans.
+But be warned.
+
+#### `asConst()`
+
+Consider:
+
+``` Cpp
+    void fun(Synchronized<fbvector<fbstring>> & vec) {
+      if (vec->size() > 1000000) {
+        LOG(WARNING) << "The blinkenlights are overloaded.";
+      }
+      vec->push_back("another blinkenlight");
+    }
+```
+
+This code is correct (at least according to a trivial intent),
+but less efficient than it could otherwise be. This is because
+the call `vec->size()` acquires a full read-write lock, but only
+needs a read lock. We need to help the type system here by
+telling it "even though `vec` is a mutable object, consider it a
+constant for this call". This should be easy enough because
+conversion to const is trivial - just issue `const_cast<const
+Synchronized<fbvector<fbstring>>&>(vec)`. Ouch. To make that
+operation simpler - a lot simpler - `Synchronized<T>` defines the
+method `asConst()`, which is a glorious one-liner. With `asConst`
+in tow, it's very easy to achieve what we wanted:
+
+``` Cpp
+    void fun(Synchronized<fbvector<fbstring>> & vec) {
+      if (vec.asConst()->size() > 1000000) {
+        LOG(WARNING) << "The blinkenlights are overloaded.";
+      }
+      vec->push_back("another blinkenlight");
+    }
+```
+
+QED (Quite Easy Done). This concludes the documentation for
+`Synchronized<T>`.
+
+### `SYNCHRONIZED`
+
+The `SYNCHRONIZED` macro introduces a pseudo-statement that adds
+a whole new level of usability to `Synchronized<T>`. As
+discussed, `operator->` can only lock over the duration of a
+call, so it is insufficient for complex operations. With
+`SYNCHRONIZED` you get to lock the object in a scoped manner (not
+unlike Java's `synchronized` statement) and to directly access
+the object inside that scope.
+
+`SYNCHRONIZED` has two forms. We've seen the first one a couple
+of times already:
+
+``` Cpp
+    void fun(Synchronized<fbvector<int>> & vec) {
+      SYNCHRONIZED (vec) {
+        vec.push_back(42);
+        CHECK(vec.back() == 42);
+        ...
+      }
+    }
+```
+
+The scope introduced by `SYNCHRONIZED` is a critical section
+guarded by `vec`'s mutex. In addition to doing that,
+`SYNCHRONIZED` also does an interesting sleight of hand: it binds
+the name `vec` inside the scope to the underlying `fbvector<int>`
+object - as opposed to `vec`'s normal type, which is
+`Synchronized<fbvector<int>>`. This fits very nice the "form
+follow function" - inside the critical section you have earned
+access to the actual data, and the name bindings reflect that as
+well. `SYNCHRONIZED(xyz)` essentially cracks `xyz` temporarily
+and gives you access to its innards.
+
+Now, what if `fun` wants to take a pointer to
+`Synchronized<fbvector<int>>` - let's call it `pvec`? Generally,
+what if we want to synchronize on an expression as opposed to a
+symbolic variable? In that case `SYNCHRONIZED(*pvec)` would not
+work because "`*pvec`" is not a name. That's where the second
+form of `SYNCHRONIZED` kicks in:
+
+``` Cpp
+    void fun(Synchronized<fbvector<int>> * pvec) {
+      SYNCHRONIZED (vec, *pvec) {
+        vec.push_back(42);
+        CHECK(vec.back() == 42);
+        ...
+      }
+    }
+```
+
+Ha, so now we pass two arguments to `SYNCHRONIZED`. The first
+argument is the name bound to the data, and the second argument
+is the expression referring to the `Synchronized<T>` object. So
+all cases are covered.
+
+### `SYNCHRONIZED_CONST`
+
+Recall from the discussion about `asConst()` that we
+sometimes want to voluntarily restrict access to an otherwise
+mutable object. The `SYNCHRONIZED_CONST` pseudo-statement
+makes that intent easily realizable and visible to
+maintainers. For example:
+
+``` Cpp
+    void fun(Synchronized<fbvector<int>> & vec) {
+      fbvector<int> local;
+      SYNCHRONIZED_CONST (vec) {
+        CHECK(vec.size() > 42);
+        local = vec;
+      }
+      local.resize(42000);
+      SYNCHRONIZED (vec) {
+        local.swap(vec);
+      }
+    }
+```
+
+Inside a `SYNCHRONIZED_CONST(xyz)` scope, `xyz` is bound to a `const`-
+qualified datum. The corresponding lock is a read lock.
+
+`SYNCHRONIZED_CONST` also has a two-arguments version, just like
+`SYNCHRONIZED`. In fact, `SYNCHRONIZED_CONST(a)` simply expands
+to `SYNCHRONIZED(a, a.asConst())` and `SYNCHRONIZED_CONST(a, b)`
+expands to `SYNCHRONIZED(a, (b).asConst())`. The type system and
+`SYNCHRONIZED` take care of the rest.
+
+### `TIMED_SYNCHRONIZED` and `TIMED_SYNCHRONIZED_CONST`
+
+These pseudo-statements allow you to acquire the mutex with a
+timeout. Example:
+
+``` Cpp
+    void fun(Synchronized<fbvector<int>> & vec) {
+      TIMED_SYNCHRONIZED (10, vec) {
+        if (vec) {
+          vec->push_back(42);
+          CHECK(vec->back() == 42);
+        } else {
+            LOG(INFO) << "Dognabbit, I've been waiting over here for 10 milliseconds and couldn't get through!";
+        }
+      }
+    }
+```
+
+If the mutex acquisition was successful within a number of
+milliseconds dictated by its first argument, `TIMED_SYNCHRONIZED`
+binds its second argument to a pointer to the protected object.
+Otherwise, the pointer will be `NULL`. (Contrast that with
+`SYNCHRONIZED`), which always succeeds so it binds the protected
+object to a reference.) Inside the `TIMED_SYNCHRONIZED` statement
+you must, of course, make sure the pointer is not null to make
+sure the operation didn't time out.
+
+`TIMED_SYNCHRONIZED` takes two or three parameters. The first is
+always the timeout, and the remaining one or two are just like
+the parameters of `SYNCHRONIZED`.
+
+Issuing `TIMED_SYNCHRONIZED` with a zero timeout is an
+opportunistic attempt to acquire the mutex.
+
+### `UNSYNCHRONIZED`
+
+`SYNCHRONIZED` is a good mechanism for enforcing scoped
+synchronization, but it has the inherent limitation that it
+requires the critical section to be, well, scoped. Sometimes the
+code structure requires a fleeting "escape" from the iron fist of
+synchronization. Clearly, simple cases are handled with sequenced
+`SYNCHRONIZED` scopes:
+
+``` Cpp
+    Synchronized<map<int, string>> dic;
+    ...
+    SYNCHRONIZED (dic) {
+      if (dic.find(0) != dic.end()) {
+        return;
+      }
+    }
+    LOG(INFO) << "Key 0 not found, inserting it."
+    SYNCHRONIZED (dic) {
+      dic[0] = "zero";
+    }
+```
+
+For more complex, nested flow control, you may want to use the
+`UNSYNCHRONIZED` macro. It (only) works inside a `SYNCHRONIZED`
+pseudo-statement and temporarily unlocks the mutex:
+
+``` Cpp
+
+    Synchronized<map<int, string>> dic;
+    ...
+    SYNCHRONIZED (dic) {
+      auto i = dic.find(0);
+      if (i != dic.end()) {
+        UNSYNCHRONIZED (dic) {
+          LOG(INFO) << "Key 0 not found, inserting it."
+        }
+        dic[0] = "zero";
+      } else {
+        *i = "zero";
+      }
+    }
+    LOG(INFO) << "Key 0 not found, inserting it."
+    SYNCHRONIZED (dic) {
+      dic[0] = "zero";
+    }
+```
+
+Clearly `UNSYNCHRONIZED` comes with specific caveats and
+liabilities. You must assume that during the `UNSYNCHRONIZED`
+section, other threads might have changed the protected structure
+in arbitrary ways. In the example above, you cannot use the
+iterator `i` and you cannot assume that the key `0` is not in the
+map; another thread might have inserted it while you were
+bragging on `LOG(INFO)`.
+
+### `SYNCHRONIZED_DUAL`
+
+Sometimes locking just one object won't be able to cut the mustard. Consider a
+function that needs to lock two `Synchronized` objects at the
+same time - for example, to copy some data from one to the other.
+At first sight, it looks like nested `SYNCHRONIZED` statements
+will work just fine:
+
+``` Cpp
+    void fun(Synchronized<fbvector<int>> & a, Synchronized<fbvector<int>> & b) {
+      SYNCHRONIZED (a) {
+        SYNCHRONIZED (b) {
+          ... use a and b ...
+        }
+      }
+    }
+```
+
+This code compiles and may even run most of the time, but embeds
+a deadly peril: if one threads call `fun(x, y)` and another
+thread calls `fun(y, x)`, then the two threads are liable to
+deadlocking as each thread will be waiting for a lock the other
+is holding. This issue is a classic that applies regardless of
+the fact the objects involved have the same type.
+
+This classic problem has a classic solution: all threads must
+acquire locks in the same order. The actual order is not
+important, just the fact that the order is the same in all
+threads. Many libraries simply acquire mutexes in increasing
+order of their address, which is what we'll do, too. The pseudo-
+statement `SYNCHRONIZED_DUAL` takes care of all details of proper
+locking of two objects and offering their innards:
+
+``` Cpp
+    void fun(Synchronized<fbvector<int>> & a, Synchronized<fbvector<int>> & b) {
+      SYNCHRONIZED_DUAL (myA, a, myB, b) {
+        ... use myA and myB ...
+      }
+    }
+```
+
+To avoid potential confusions, `SYNCHRONIZED_DUAL` only defines a
+four-arguments version. The code above locks `a` and `b` in
+increasing order of their address and offers their data under the
+names `myA` and `myB`, respectively.
+
+### Synchronizing several data items with one mutex
+
+The library is geared at protecting one object of a given type
+with a mutex. However, sometimes we'd like to protect two or more
+members with the same mutex. Consider for example a bidirectional
+map, i.e. a map that holds an `int` to `string` mapping and also
+the converse `string` to `int` mapping. The two maps would need
+to be manipulated simultaneously. There are at least two designs
+that come to mind.
+
+#### Using a nested `struct`
+
+You can easily pack the needed data items in a little struct.
+For example:
+
+``` Cpp
+    class Server {
+      struct BiMap {
+        map<int, string> direct;
+        map<string, int> inverse;
+      };
+      Synchronized<BiMap> bimap_;
+      ...
+    };
+    ...
+    SYNCHRONIZED (bymap_) {
+      bymap_.direct[0] = "zero";
+      bymap_.inverse["zero"] = 0;
+    }
+```
+
+With this code in tow you get to use `bimap_` just like any other
+`Synchronized` object, without much effort.
+
+#### Using `std::tuple`
+
+If you won't stop short of using a spaceship-era approach,
+`std::tuple` is there for you. The example above could be
+rewritten for the same functionality like this:
+
+``` Cpp
+    class Server {
+      Synchronized<tuple<map<int, string>, map<string, int>>> bimap_;
+      ...
+    };
+    ...
+    SYNCHRONIZED (bymap_) {
+      get<0>(bymap_)[0] = "zero";
+      get<1>(bymap_)["zero"] = 0;
+    }
+```
+
+The code uses `std::get` with compile-time integers to access the
+fields in the tuple. The relative advantages and disadvantages of
+using a local struct vs. `std::tuple` are quite obvious - in the
+first case you need to invest in the definition, in the second
+case you need to put up with slightly more verbose and less clear
+access syntax.
+
+### Summary
+
+`Synchronized` and its supporting tools offer you a simple,
+robust paradigm for mutual exclusion-based concurrency. Instead
+of manually pairing data with the mutexes that protect it and
+relying on convention to use them appropriately, you can benefit
+of encapsulation and typechecking to offload a large part of that
+task and to provide good guarantees.
diff --git a/folly/docs/ThreadCachedInt.md b/folly/docs/ThreadCachedInt.md
new file mode 100644
index 00000000..c4f780ec
--- /dev/null
+++ b/folly/docs/ThreadCachedInt.md
@@ -0,0 +1,98 @@
+`folly/ThreadCachedInt.h`
+----------------------
+
+High-performance atomic increment using thread caching.
+
+`folly/ThreadCachedInt.h` introduces a integer class designed for high
+performance increments from multiple threads simultaneously without
+loss of precision.  It has two read modes, `readFast` gives a potentially stale
+value with one load, and `readFull` gives the exact value, but is much slower,
+as discussed below.
+
+
+### Performance
+***
+
+Increment performance is up to 10x greater than `std::atomic_fetch_add` in high
+contention environments.  See `folly/test/ThreadCachedIntTest.h` for more
+comprehensive benchmarks.
+
+`readFast` is as fast as a single load.
+
+`readFull`, on the other hand, requires acquiring a mutex and iterating through
+a list to accumulate the values of all the thread local counters, so is
+significantly slower than `readFast`.
+
+
+### Usage
+***
+
+Create an instance and increment it with `increment` or the operator overloads.
+Read the value with `readFast` for quick, potentially stale data, or `readFull`
+for a more expensive but precise result. There are additional convenience
+functions as well, such as `set`.
+
+``` Cpp
+    ThreadCachedInt<int64_t> val;
+    EXPECT_EQ(0, val.readFast());
+    ++val;                        // increment in thread local counter only
+    EXPECT_EQ(0, val.readFast()); // increment has not been flushed
+    EXPECT_EQ(1, val.readFull()); // accumulates all thread local counters
+    val.set(2);
+    EXPECT_EQ(2, val.readFast());
+    EXPECT_EQ(2, val.readFull());
+```
+
+### Implementation
+***
+
+`folly::ThreadCachedInt` uses `folly::ThreadLocal` to store thread specific
+objects that each have a local counter.  When incrementing, the thread local
+instance is incremented.  If the local counter passes the cache size, the value
+is flushed to the global counter with an atomic increment.  It is this global
+counter that is read with `readFast` via a simple load, but will not count any
+of the updates that haven't been flushed.
+
+In order to read the exact value, `ThreadCachedInt` uses the extended
+`readAllThreads()` API of `folly::ThreadLocal` to iterate through all the
+references to all the associated thread local object instances.  This currently
+requires acquiring a global mutex and iterating through the references,
+accumulating the counters along with the global counter.  This also means that
+the first use of the object from a new thread will acquire the mutex in order to
+insert the thread local reference into the list.  By default, there is one
+global mutex per integer type used in `ThreadCachedInt`.  If you plan on using a
+lot of `ThreadCachedInt`s in your application, considering breaking up the
+global mutex by introducing additional `Tag` template parameters.
+
+`set` simply sets the global counter value, and marks all the thread local
+instances as needing to be reset.  When iterating with `readFull`, thread local
+counters that have been marked as reset are skipped.  When incrementing, thread
+local counters marked for reset are set to zero and unmarked for reset.
+
+Upon destruction, thread local counters are flushed to the parent so that counts
+are not lost after increments in temporary threads.  This requires grabbing the
+global mutex to make sure the parent itself wasn't destroyed in another thread
+already.
+
+### Alternate Implementations
+***
+
+There are of course many ways to skin a cat, and you may notice there is a
+partial alternate implementation in `folly/test/ThreadCachedIntTest.cpp` that
+provides similar performance.  `ShardedAtomicInt` simply uses an array of
+`std::atomic<int64_t>`'s and hashes threads across them to do low-contention
+atomic increments, and `readFull` just sums up all the ints.
+
+This sounds great, but in order to get the contention low enough to get similar
+performance as ThreadCachedInt with 24 threads, `ShardedAtomicInt` needs about
+2000 ints to hash across.  This uses about 20x more memory, and the lock-free
+`readFull` has to sum up all 2048 ints, which ends up being a about 50x slower
+than `ThreadCachedInt` in low contention situations, which is hopefully the
+common case since it's designed for high-write, low read access patterns.
+Performance of `readFull` is about the same speed as `ThreadCachedInt` in high
+contention environments.
+
+Depending on the operating conditions, it may make more sense to use one
+implementation over the other.  For example, a lower contention environment will
+probably be able to use a `ShardedAtomicInt` with a much smaller array without
+hurting performance, while improving memory consumption and perf of `readFull`.
diff --git a/folly/docs/ThreadLocal.md b/folly/docs/ThreadLocal.md
new file mode 100644
index 00000000..1c185fba
--- /dev/null
+++ b/folly/docs/ThreadLocal.md
@@ -0,0 +1,106 @@
+`folly/ThreadLocal.h`
+----------------------
+
+Improved thread local storage for non-trivial types.
+
+ * ~4x faster than `boost::thread_specific_ptr`.
+ * Similar speed as using `pthread_getspecific` directly, but only consumes a
+   single `pthread_key_t` per `Tag` template param.
+ * Expands on the `thread_specific_ptr` API with `accessAllThreads` and extended
+   custom deleter support.
+
+
+### Usage
+***
+
+The API of `ThreadLocalPtr` is very close to `boost::thread_specific_ptr` with
+the notable addition of the `accessAllThreads` method.  There is also a
+`ThreadLocal` class which is a thin wrapper around `ThreadLocalPtr` that manages
+allocation automatically (creates a new object the first time it is dereferenced
+from each thread).
+
+`ThreadLocalPtr` simply gives you a place to put and access a pointer local to
+each thread such that it will be destroyed appropriately.
+
+```Cpp
+{
+  folly::ThreadLocalPtr<Widget> w;
+  w.reset(new Widget(0), Widget::customDeleterA);
+  std::thread([&w]() {
+    w.reset(new Widget(1), Widget::customDeleterB);
+    w.get()->mangleWidget();
+  } // Widget(1) is destroyed with customDeleterB
+} // Widget(0) is destroyed with customDeleterA
+```
+
+Note that `customDeleterB` will get called with
+`TLPDestructionMode::THIS_THREAD` and `customerDeleterA` will get called with
+`TLPDestructionMode::ALL_THREADS`.  This is to distinguish between thread exit
+vs. the entire `ThreadLocalPtr` getting destroyed, in which case there is
+cleanup work that may be avoided.
+
+The `accessAllThreads` interface is provided to walk all the thread local child
+objects of a parent.  `accessAllThreads` initializes an accessor
+which holds a global lock that blocks all creation and destruction of
+`ThreadLocal` objects with the same `Tag` and can be used as an iterable
+container. Typical use is for frequent write, infrequent read data access
+patterns such as counters.  Note that you must specify a unique Tag type so you
+don't block other ThreadLocal object usage, and you should try to minimize the
+lifetime of the accessor so the lock is held for as short as possible).
+
+The following example is a simplification of `folly/ThreadCachedInt.h`.  It
+keeps track of a counter value and allows multiple threads to add to the count
+without synchronization.  In order to get the total count, `read()` iterates
+through all the thread local values via `accessAllThreads()` and sums them up.
+`class NewTag` is used to break the global mutex so that this class won't block
+other `ThreadLocal` usage when `read()` is called.
+
+Note that `read()` holds the global mutex which blocks construction,
+destruction, and `read()` for other `SimpleThreadCachedInt`'s, but does not
+block `add()`.  Also, since it uses the unique `NewTag`, `SimpleThreadCachedInt`
+does not affect other `ThreadLocal` usage.
+
+```Cpp
+class SimpleThreadCachedInt {
+
+  class NewTag;  // Segments the global mutex
+  ThreadLocal<int,NewTag> val_;
+
+ public:
+  void add(int val) {
+    *val_ += val;  // operator*() gives a reference to the thread local instance
+  }
+
+  int read() {
+    int ret = 0;
+    // accessAllThreads acquires the global lock
+    for (const auto& i : val_.accessAllThreads()) {
+      ret += i;
+    }  // Global lock is released on scope exit
+    return ret;
+  }
+};
+```
+
+
+### Implementation
+***
+
+We keep a `__thread` array of pointers to objects (`ThreadEntry::elements`)
+where each array has an index for each unique instance of the `ThreadLocalPtr`
+object.  Each `ThreadLocalPtr` object has a unique id that is an index into
+these arrays so we can fetch the correct object from thread local storage
+very efficiently.
+
+In order to prevent unbounded growth of the id space and thus huge
+`ThreadEntry::elements` arrays, for example due to continuous creation and
+destruction of `ThreadLocalPtr` objects, we keep track of all active instances
+by linking them together into a list.  When an instance is destroyed we remove
+it from the chain and insert the id into `freeIds_` for reuse.  These operations
+require a global mutex, but only happen at construction and destruction time.
+`accessAllThreads` also acquires this global mutex.
+
+We use a single global `pthread_key_t` per `Tag` to manage object destruction
+and memory cleanup upon thread exit because there is a finite number of
+`pthread_key_t`'s available per machine.
+
diff --git a/folly/docs/small_vector.md b/folly/docs/small_vector.md
new file mode 100644
index 00000000..a9a2797e
--- /dev/null
+++ b/folly/docs/small_vector.md
@@ -0,0 +1,69 @@
+`folly/small_vector.h`
+----------------------
+
+`folly::small_vector<T,Int=1,...>` is a sequence container that
+implements small buffer optimization. It behaves similarly to
+std::vector, except until a certain number of elements are reserved it
+does not use the heap.
+
+Like standard vector, it is guaranteed to use contiguous memory.  (So,
+after it spills to the heap all the elements live in the heap buffer.)
+
+Simple usage example:
+
+``` Cpp
+    small_vector<int,2> vec;
+    vec.push_back(0); // Stored in-place on stack
+    vec.push_back(1); // Still on the stack
+    vec.push_back(2); // Switches to heap buffer.
+```
+
+### Details
+***
+
+This class is useful in either of following cases:
+
+* Short-lived stack vectors with few elements (or maybe with a
+  usually-known number of elements), if you want to avoid malloc.
+
+* If the vector(s) are usually under a known size and lookups are very
+  common, you'll save an extra cache miss in the common case when the
+  data is kept in-place.
+
+* You have billions of these vectors and don't want to waste space on
+  `std::vector`'s capacity tracking.  This vector lets `malloc` track our
+  allocation capacity.  (Note that this slows down the
+  insertion/reallocation code paths significantly; if you need those
+  to be fast you should use `fbvector`.)
+
+The last two cases were the main motivation for implementing it.
+
+There are also a couple of flags you can pass into this class
+template to customize its behavior.  You can provide them in any
+order after the in-place count.  They are all in the `namespace
+small_vector_policy`.
+
+* `NoHeap` - Avoid the heap entirely.  (Throws `std::length_error` if
+  you would've spilled out of the in-place allocation.)
+
+* `OneBitMutex` - On x64 platforms, this spends one bit of the
+  `size_type` to provide a spin lock that you can use for whatever you
+  want.
+
+* `<Any integral type>` - customizes the amount of space we spend on
+  tracking the size of the vector.
+
+A couple more examples:
+
+``` Cpp
+    // With space for 32 in situ unique pointers, and only using a
+    // 4-byte size_type.
+    small_vector<std::unique_ptr<int>, 32, uint32_t> v;
+
+    // A inline vector of up to 256 ints which will not use the
+    // heap and comes with a spin lock.
+    small_vector<int, 256, NoHeap, OneBitMutex> v;
+
+    // Same as the above, but making the size_type smaller too.
+    small_vector<int, 256, NoHeap, uint16_t, OneBitMutex> v;
+```
diff --git a/folly/docs/style.css b/folly/docs/style.css
new file mode 100644
index 00000000..67ac7e7a
--- /dev/null
+++ b/folly/docs/style.css
@@ -0,0 +1,7 @@
+<style type="text/css">
+pre.literal-block, pre.doctest-block, pre.sourceCode {
+    margin-left: 2em;
+    margin-right: 2em;
+    background-color: #eeeeee
+}
+</style>
diff --git a/folly/dynamic-inl.h b/folly/dynamic-inl.h
new file mode 100644
index 00000000..4a7c3c54
--- /dev/null
+++ b/folly/dynamic-inl.h
@@ -0,0 +1,850 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_DYNAMIC_INL_H_
+#define FOLLY_DYNAMIC_INL_H_
+
+#include <functional>
+#include <boost/iterator/iterator_adaptor.hpp>
+#include <boost/iterator/iterator_facade.hpp>
+#include "folly/Likely.h"
+#include "folly/Conv.h"
+#include "folly/Format.h"
+
+//////////////////////////////////////////////////////////////////////
+
+namespace std {
+
+template<>
+struct hash< ::folly::dynamic> {
+  size_t operator()(::folly::dynamic const& d) const {
+    return d.hash();
+  }
+};
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+// This is a higher-order preprocessor macro to aid going from runtime
+// types to the compile time type system.
+#define FB_DYNAMIC_APPLY(type, apply) do {         \
+  switch ((type)) {                             \
+  case NULLT:   apply(void*);          break;   \
+  case ARRAY:   apply(Array);          break;   \
+  case BOOL:    apply(bool);           break;   \
+  case DOUBLE:  apply(double);         break;   \
+  case INT64:   apply(int64_t);        break;   \
+  case OBJECT:  apply(ObjectImpl);     break;   \
+  case STRING:  apply(fbstring);       break;   \
+  default:      CHECK(0); abort();              \
+  }                                             \
+} while (0)
+
+//////////////////////////////////////////////////////////////////////
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+namespace detail {
+
+  // This helper is used in destroy() to be able to run destructors on
+  // types like "int64_t" without a compiler error.
+  struct Destroy {
+    template<class T> static void destroy(T* t) { t->~T(); }
+  };
+
+  /*
+   * The enable_if junk here is necessary to avoid ambiguous
+   * conversions relating to bool and double when you implicitly
+   * convert an int or long to a dynamic.
+   */
+  template<class T, class Enable = void> struct ConversionHelper;
+  template<class T>
+  struct ConversionHelper<
+    T,
+    typename std::enable_if<
+      std::is_integral<T>::value && !std::is_same<T,bool>::value
+    >::type
+  > {
+    typedef int64_t type;
+  };
+  template<class T>
+  struct ConversionHelper<
+    T,
+    typename std::enable_if<
+      (!std::is_integral<T>::value || std::is_same<T,bool>::value) &&
+      !std::is_same<T,std::nullptr_t>::value
+    >::type
+  > {
+    typedef T type;
+  };
+  template<class T>
+  struct ConversionHelper<
+    T,
+    typename std::enable_if<
+      std::is_same<T,std::nullptr_t>::value
+    >::type
+  > {
+    typedef void* type;
+  };
+
+  /*
+   * Helper for implementing numeric conversions in operators on
+   * numbers.  Just promotes to double when one of the arguments is
+   * double, or throws if either is not a numeric type.
+   */
+  template<template<class> class Op>
+  dynamic numericOp(dynamic const& a, dynamic const& b) {
+    if (!a.isNumber() || !b.isNumber()) {
+      throw TypeError("numeric", a.type(), b.type());
+    }
+    if (a.type() != b.type()) {
+      auto& integ  = a.isInt() ? a : b;
+      auto& nonint = a.isInt() ? b : a;
+      return Op<double>()(to<double>(integ.asInt()), nonint.asDouble());
+    }
+    if (a.isDouble()) {
+      return Op<double>()(a.asDouble(), b.asDouble());
+    }
+    return Op<int64_t>()(a.asInt(), b.asInt());
+  }
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+struct TypeError : std::runtime_error {
+  explicit TypeError(const std::string& expected, dynamic::Type actual)
+    : std::runtime_error(to<std::string>("TypeError: expected dynamic "
+        "type `", expected, '\'', ", but had type `",
+        dynamic::typeName(actual), '\''))
+  {}
+  explicit TypeError(const std::string& expected,
+      dynamic::Type actual1, dynamic::Type actual2)
+    : std::runtime_error(to<std::string>("TypeError: expected dynamic "
+        "types `", expected, '\'', ", but had types `",
+        dynamic::typeName(actual1), "' and `", dynamic::typeName(actual2),
+        '\''))
+  {}
+};
+
+/*
+ * We're doing this instead of a simple member typedef to avoid the
+ * undefined behavior of parameterizing std::unordered_map<> with an
+ * incomplete type.
+ *
+ * Note: Later we may add separate order tracking here (a multi-index
+ * type of thing.)
+ */
+struct dynamic::ObjectImpl : std::unordered_map<dynamic, dynamic> {};
+
+//////////////////////////////////////////////////////////////////////
+
+// Helper object for creating objects conveniently.  See object and
+// the dynamic::dynamic(ObjectMaker&&) ctor.
+struct dynamic::ObjectMaker {
+  friend struct dynamic;
+
+  explicit ObjectMaker() : val_(dynamic::object) {}
+  explicit ObjectMaker(dynamic const& key, dynamic val)
+    : val_(dynamic::object)
+  {
+    val_.insert(key, std::move(val));
+  }
+  explicit ObjectMaker(dynamic&& key, dynamic val)
+    : val_(dynamic::object)
+  {
+    val_.insert(std::move(key), std::move(val));
+  }
+
+  // Make sure no one tries to save one of these into an lvalue with
+  // auto or anything like that.
+  ObjectMaker(ObjectMaker&&) = default;
+  ObjectMaker(ObjectMaker const&) = delete;
+  ObjectMaker& operator=(ObjectMaker const&) = delete;
+  ObjectMaker& operator=(ObjectMaker&&) = delete;
+
+  // These return rvalue-references instead of lvalue-references to allow
+  // constructs like this to moved instead of copied:
+  //  dynamic a = dynamic::object("a", "b")("c", "d")
+  ObjectMaker&& operator()(dynamic const& key, dynamic val) {
+    val_.insert(key, std::move(val));
+    return std::move(*this);
+  }
+
+  ObjectMaker&& operator()(dynamic&& key, dynamic val) {
+    val_.insert(std::move(key), std::move(val));
+    return std::move(*this);
+  }
+
+private:
+  dynamic val_;
+};
+
+template<class... Args>
+inline dynamic::ObjectMaker dynamic::object(Args&&... args) {
+  return dynamic::ObjectMaker(std::forward<Args>(args)...);
+}
+
+//////////////////////////////////////////////////////////////////////
+
+struct dynamic::const_item_iterator
+  : boost::iterator_adaptor<dynamic::const_item_iterator,
+                            dynamic::ObjectImpl::const_iterator> {
+  /* implicit */ const_item_iterator(base_type b) : iterator_adaptor_(b) { }
+
+ private:
+  friend class boost::iterator_core_access;
+};
+
+struct dynamic::const_key_iterator
+  : boost::iterator_adaptor<dynamic::const_key_iterator,
+                            dynamic::ObjectImpl::const_iterator,
+                            dynamic const> {
+  /* implicit */ const_key_iterator(base_type b) : iterator_adaptor_(b) { }
+
+ private:
+  dynamic const& dereference() const {
+    return base_reference()->first;
+  }
+  friend class boost::iterator_core_access;
+};
+
+struct dynamic::const_value_iterator
+  : boost::iterator_adaptor<dynamic::const_value_iterator,
+                            dynamic::ObjectImpl::const_iterator,
+                            dynamic const> {
+  /* implicit */ const_value_iterator(base_type b) : iterator_adaptor_(b) { }
+
+ private:
+  dynamic const& dereference() const {
+    return base_reference()->second;
+  }
+  friend class boost::iterator_core_access;
+};
+
+//////////////////////////////////////////////////////////////////////
+
+inline dynamic::dynamic(ObjectMaker (*)())
+  : type_(OBJECT)
+{
+  new (getAddress<ObjectImpl>()) ObjectImpl();
+}
+
+inline dynamic::dynamic(char const* s)
+  : type_(STRING)
+{
+  new (&u_.string) fbstring(s);
+}
+
+inline dynamic::dynamic(std::string const& s)
+  : type_(STRING)
+{
+  new (&u_.string) fbstring(s);
+}
+
+inline dynamic::dynamic(std::initializer_list<dynamic> il)
+  : type_(ARRAY)
+{
+  new (&u_.array) Array(il.begin(), il.end());
+}
+
+inline dynamic::dynamic(ObjectMaker&& maker)
+  : type_(OBJECT)
+{
+  new (getAddress<ObjectImpl>())
+    ObjectImpl(std::move(*maker.val_.getAddress<ObjectImpl>()));
+}
+
+inline dynamic::dynamic(dynamic const& o)
+  : type_(NULLT)
+{
+  *this = o;
+}
+
+inline dynamic::dynamic(dynamic&& o)
+  : type_(NULLT)
+{
+  *this = std::move(o);
+}
+
+inline dynamic::~dynamic() { destroy(); }
+
+template<class T>
+dynamic::dynamic(T t) {
+  typedef typename detail::ConversionHelper<T>::type U;
+  type_ = TypeInfo<U>::type;
+  new (getAddress<U>()) U(std::move(t));
+}
+
+template<class Iterator>
+dynamic::dynamic(Iterator first, Iterator last)
+  : type_(ARRAY)
+{
+  new (&u_.array) Array(first, last);
+}
+
+//////////////////////////////////////////////////////////////////////
+
+inline dynamic::const_iterator dynamic::begin() const {
+  return get<Array>().begin();
+}
+inline dynamic::const_iterator dynamic::end() const {
+  return get<Array>().end();
+}
+
+template <class It>
+struct dynamic::IterableProxy {
+  typedef It const_iterator;
+
+  /* implicit */ IterableProxy(const dynamic::ObjectImpl* o) : o_(o) { }
+
+  It begin() const {
+    return o_->begin();
+  }
+
+  It end() const {
+    return o_->end();
+  }
+
+ private:
+  const dynamic::ObjectImpl* o_;
+};
+
+inline dynamic::IterableProxy<dynamic::const_key_iterator> dynamic::keys()
+  const {
+  return &(get<ObjectImpl>());
+}
+
+inline dynamic::IterableProxy<dynamic::const_value_iterator> dynamic::values()
+  const {
+  return &(get<ObjectImpl>());
+}
+
+inline dynamic::IterableProxy<dynamic::const_item_iterator> dynamic::items()
+  const {
+  return &(get<ObjectImpl>());
+}
+
+inline bool dynamic::isString() const { return get_nothrow<fbstring>(); }
+inline bool dynamic::isObject() const { return get_nothrow<ObjectImpl>(); }
+inline bool dynamic::isBool()   const { return get_nothrow<bool>(); }
+inline bool dynamic::isArray()  const { return get_nothrow<Array>(); }
+inline bool dynamic::isDouble() const { return get_nothrow<double>(); }
+inline bool dynamic::isInt()    const { return get_nothrow<int64_t>(); }
+inline bool dynamic::isNull()   const { return get_nothrow<void*>(); }
+inline bool dynamic::isNumber() const { return isInt() || isDouble(); }
+
+inline dynamic::Type dynamic::type() const {
+  return type_;
+}
+
+inline fbstring dynamic::asString() const { return asImpl<fbstring>(); }
+inline double   dynamic::asDouble() const { return asImpl<double>(); }
+inline int64_t  dynamic::asInt()    const { return asImpl<int64_t>(); }
+inline bool     dynamic::asBool()   const { return asImpl<bool>(); }
+
+template<class T>
+struct dynamic::CompareOp {
+  static bool comp(T const& a, T const& b) { return a < b; }
+};
+template<>
+struct dynamic::CompareOp<dynamic::ObjectImpl> {
+  static bool comp(ObjectImpl const& a, ObjectImpl const& b) {
+    // This code never executes; it is just here for the compiler.
+    return false;
+  }
+};
+
+inline bool dynamic::operator<(dynamic const& o) const {
+  if (UNLIKELY(type_ == OBJECT || o.type_ == OBJECT)) {
+    throw TypeError("object", type_);
+  }
+  if (type_ != o.type_) {
+    return type_ < o.type_;
+  }
+
+#define FB_X(T) return CompareOp<T>::comp(*getAddress<T>(),   \
+                                          *o.getAddress<T>())
+  FB_DYNAMIC_APPLY(type_, FB_X);
+#undef FB_X
+}
+
+inline bool dynamic::operator==(dynamic const& o) const {
+  if (type() != o.type()) {
+    if (isNumber() && o.isNumber()) {
+      auto& integ = isInt() ? *this : o;
+      auto& doubl = isInt() ? o     : *this;
+      return integ.asInt() == doubl.asDouble();
+    }
+    return false;
+  }
+
+#define FB_X(T) return *getAddress<T>() == *o.getAddress<T>();
+  FB_DYNAMIC_APPLY(type_, FB_X);
+#undef FB_X
+}
+
+inline dynamic& dynamic::operator+=(dynamic const& o) {
+  if (type() == STRING && o.type() == STRING) {
+    *getAddress<fbstring>() += *o.getAddress<fbstring>();
+    return *this;
+  }
+  *this = detail::numericOp<std::plus>(*this, o);
+  return *this;
+}
+
+inline dynamic& dynamic::operator-=(dynamic const& o) {
+  *this = detail::numericOp<std::minus>(*this, o);
+  return *this;
+}
+
+inline dynamic& dynamic::operator*=(dynamic const& o) {
+  *this = detail::numericOp<std::multiplies>(*this, o);
+  return *this;
+}
+
+inline dynamic& dynamic::operator/=(dynamic const& o) {
+  *this = detail::numericOp<std::divides>(*this, o);
+  return *this;
+}
+
+#define FB_DYNAMIC_INTEGER_OP(op)                           \
+  inline dynamic& dynamic::operator op(dynamic const& o) {  \
+    if (!isInt() || !o.isInt()) {                           \
+      throw TypeError("int64", type(), o.type());           \
+    }                                                       \
+    *getAddress<int64_t>() op o.asInt();                    \
+    return *this;                                           \
+  }
+
+FB_DYNAMIC_INTEGER_OP(%=)
+FB_DYNAMIC_INTEGER_OP(|=)
+FB_DYNAMIC_INTEGER_OP(&=)
+FB_DYNAMIC_INTEGER_OP(^=)
+
+#undef FB_DYNAMIC_INTEGER_OP
+
+inline dynamic& dynamic::operator++() {
+  ++get<int64_t>();
+  return *this;
+}
+
+inline dynamic& dynamic::operator--() {
+  --get<int64_t>();
+  return *this;
+}
+
+inline dynamic& dynamic::operator=(dynamic const& o) {
+  if (&o != this) {
+    destroy();
+#define FB_X(T) new (getAddress<T>()) T(*o.getAddress<T>())
+    FB_DYNAMIC_APPLY(o.type_, FB_X);
+#undef FB_X
+    type_ = o.type_;
+  }
+  return *this;
+}
+
+inline dynamic& dynamic::operator=(dynamic&& o) {
+  if (&o != this) {
+    destroy();
+#define FB_X(T) new (getAddress<T>()) T(std::move(*o.getAddress<T>()))
+    FB_DYNAMIC_APPLY(o.type_, FB_X);
+#undef FB_X
+    type_ = o.type_;
+  }
+  return *this;
+}
+
+inline dynamic& dynamic::operator[](dynamic const& k) {
+  if (!isObject() && !isArray()) {
+    throw TypeError("object/array", type());
+  }
+  if (isArray()) {
+    return at(k);
+  }
+  auto& obj = get<ObjectImpl>();
+  auto ret = obj.insert({k, nullptr});
+  return ret.first->second;
+}
+
+inline dynamic const& dynamic::operator[](dynamic const& idx) const {
+  return at(idx);
+}
+
+inline dynamic dynamic::getDefault(const dynamic& k, const dynamic& v) const {
+  auto& obj = get<ObjectImpl>();
+  auto it = obj.find(k);
+  return it == obj.end() ? v : it->second;
+}
+
+inline dynamic&& dynamic::getDefault(const dynamic& k, dynamic&& v) const {
+  auto& obj = get<ObjectImpl>();
+  auto it = obj.find(k);
+  if (it != obj.end()) {
+    v = it->second;
+  }
+
+  return std::move(v);
+}
+
+template<class K, class V> inline dynamic& dynamic::setDefault(K&& k, V&& v) {
+  auto& obj = get<ObjectImpl>();
+  return obj.insert(std::make_pair(std::forward<K>(k),
+                                   std::forward<V>(v))).first->second;
+}
+
+inline dynamic const& dynamic::at(dynamic const& idx) const {
+  return const_cast<dynamic*>(this)->at(idx);
+}
+
+inline dynamic& dynamic::at(dynamic const& idx) {
+  if (!isObject() && !isArray()) {
+    throw TypeError("object/array", type());
+  }
+
+  if (auto* parray = get_nothrow<Array>()) {
+    if (idx >= parray->size()) {
+      throw std::out_of_range("out of range in dynamic array");
+    }
+    if (!idx.isInt()) {
+      throw TypeError("int64", idx.type());
+    }
+    return (*parray)[idx.asInt()];
+  }
+
+  auto* pobj = get_nothrow<ObjectImpl>();
+  assert(pobj);
+  auto it = find(idx);
+  if (it == items().end()) {
+    throw std::out_of_range(to<std::string>(
+        "couldn't find key ", idx.asString(), " in dynamic object"));
+  }
+  return const_cast<dynamic&>(it->second);
+}
+
+inline bool dynamic::empty() const {
+  if (isNull()) {
+    return true;
+  }
+  return !size();
+}
+
+inline std::size_t dynamic::size() const {
+  if (auto* ar = get_nothrow<Array>()) {
+    return ar->size();
+  }
+  if (auto* obj = get_nothrow<ObjectImpl>()) {
+    return obj->size();
+  }
+  if (auto* str = get_nothrow<fbstring>()) {
+    return str->size();
+  }
+  throw TypeError("array/object", type());
+}
+
+inline std::size_t dynamic::count(dynamic const& key) const {
+  return find(key) != items().end();
+}
+
+inline dynamic::const_item_iterator dynamic::find(dynamic const& key) const {
+  return get<ObjectImpl>().find(key);
+}
+
+template<class K, class V> inline void dynamic::insert(K&& key, V&& val) {
+  auto& obj = get<ObjectImpl>();
+  auto rv = obj.insert(std::make_pair(std::forward<K>(key),
+                                      std::forward<V>(val)));
+  if (!rv.second) {
+    // note, the second use of std:forward<V>(val) is only correct
+    // if the first one did not result in a move. obj[key] = val
+    // would be preferrable but doesn't compile because dynamic
+    // is (intentionally) not default constructable
+    rv.first->second = std::forward<V>(val);
+  }
+}
+
+inline std::size_t dynamic::erase(dynamic const& key) {
+  auto& obj = get<ObjectImpl>();
+  return obj.erase(key);
+}
+
+inline dynamic::const_iterator dynamic::erase(const_iterator it) {
+  return get<Array>().erase(it);
+}
+
+inline dynamic::const_iterator
+dynamic::erase(const_iterator first, const_iterator last) {
+  return get<Array>().erase(first, last);
+}
+
+inline dynamic::const_key_iterator dynamic::erase(const_key_iterator it) {
+  return const_key_iterator(get<ObjectImpl>().erase(it.base()));
+}
+
+inline dynamic::const_key_iterator dynamic::erase(const_key_iterator first,
+                                                  const_key_iterator last) {
+  return const_key_iterator(get<ObjectImpl>().erase(first.base(),
+                                                    last.base()));
+}
+
+inline dynamic::const_value_iterator dynamic::erase(const_value_iterator it) {
+  return const_value_iterator(get<ObjectImpl>().erase(it.base()));
+}
+
+inline dynamic::const_value_iterator dynamic::erase(const_value_iterator first,
+                                                    const_value_iterator last) {
+  return const_value_iterator(get<ObjectImpl>().erase(first.base(),
+                                                      last.base()));
+}
+
+inline dynamic::const_item_iterator dynamic::erase(const_item_iterator it) {
+  return const_item_iterator(get<ObjectImpl>().erase(it.base()));
+}
+
+inline dynamic::const_item_iterator dynamic::erase(const_item_iterator first,
+                                                   const_item_iterator last) {
+  return const_item_iterator(get<ObjectImpl>().erase(first.base(),
+                                                     last.base()));
+}
+
+inline void dynamic::resize(std::size_t sz, dynamic const& c) {
+  auto& array = get<Array>();
+  array.resize(sz, c);
+}
+
+inline void dynamic::push_back(dynamic const& v) {
+  auto& array = get<Array>();
+  array.push_back(v);
+}
+
+inline void dynamic::push_back(dynamic&& v) {
+  auto& array = get<Array>();
+  array.push_back(std::move(v));
+}
+
+inline std::size_t dynamic::hash() const {
+  switch (type()) {
+  case OBJECT:
+  case ARRAY:
+  case NULLT:
+    throw TypeError("not null/object/array", type());
+  case INT64:
+    return std::hash<int64_t>()(asInt());
+  case DOUBLE:
+    return std::hash<double>()(asDouble());
+  case BOOL:
+    return std::hash<bool>()(asBool());
+  case STRING:
+    return std::hash<fbstring>()(asString());
+  default:
+    CHECK(0); abort();
+  }
+}
+
+//////////////////////////////////////////////////////////////////////
+
+template<class T> struct dynamic::TypeInfo {
+  static char const name[];
+  static Type const type;
+};
+
+template<class T>
+T dynamic::asImpl() const {
+  switch (type()) {
+  case INT64:    return to<T>(*get_nothrow<int64_t>());
+  case DOUBLE:   return to<T>(*get_nothrow<double>());
+  case BOOL:     return to<T>(*get_nothrow<bool>());
+  case STRING:   return to<T>(*get_nothrow<fbstring>());
+  default:
+    throw TypeError("int/double/bool/string", type());
+  }
+}
+
+// Return a T* to our type, or null if we're not that type.
+template<class T>
+T* dynamic::get_nothrow() {
+  if (type_ != TypeInfo<T>::type) {
+    return nullptr;
+  }
+  return getAddress<T>();
+}
+
+template<class T>
+T const* dynamic::get_nothrow() const {
+  return const_cast<dynamic*>(this)->get_nothrow<T>();
+}
+
+// Return T* for where we can put a T, without type checking.  (Memory
+// might be uninitialized, even.)
+template<class T>
+T* dynamic::getAddress() {
+  return GetAddrImpl<T>::get(u_);
+}
+
+template<class T>
+T const* dynamic::getAddress() const {
+  return const_cast<dynamic*>(this)->getAddress<T>();
+}
+
+template<class T> struct dynamic::GetAddrImpl {};
+template<> struct dynamic::GetAddrImpl<void*> {
+  static void** get(Data& d) { return &d.nul; }
+};
+template<> struct dynamic::GetAddrImpl<dynamic::Array> {
+  static Array* get(Data& d) { return &d.array; }
+};
+template<> struct dynamic::GetAddrImpl<bool> {
+  static bool* get(Data& d) { return &d.boolean; }
+};
+template<> struct dynamic::GetAddrImpl<int64_t> {
+  static int64_t* get(Data& d) { return &d.integer; }
+};
+template<> struct dynamic::GetAddrImpl<double> {
+  static double* get(Data& d) { return &d.doubl; }
+};
+template<> struct dynamic::GetAddrImpl<fbstring> {
+  static fbstring* get(Data& d) { return &d.string; }
+};
+template<> struct dynamic::GetAddrImpl<dynamic::ObjectImpl> {
+  static_assert(sizeof(ObjectImpl) <= sizeof(Data::objectBuffer),
+    "In your implementation, std::unordered_map<> apparently takes different"
+    " amount of space depending on its template parameters.  This is "
+    "weird.  Make objectBuffer bigger if you want to compile dynamic.");
+
+  static ObjectImpl* get(Data& d) {
+    void* data = &d.objectBuffer;
+    return static_cast<ObjectImpl*>(data);
+  }
+};
+
+template<class T>
+T& dynamic::get() {
+  if (auto* p = get_nothrow<T>()) {
+    return *p;
+  }
+  throw TypeError(TypeInfo<T>::name, type());
+}
+
+template<class T>
+T const& dynamic::get() const {
+  return const_cast<dynamic*>(this)->get<T>();
+}
+
+inline char const* dynamic::typeName(Type t) {
+#define FB_X(T) return TypeInfo<T>::name
+  FB_DYNAMIC_APPLY(t, FB_X);
+#undef FB_X
+}
+
+inline void dynamic::destroy() {
+  // This short-circuit speeds up some microbenchmarks.
+  if (type_ == NULLT) return;
+
+#define FB_X(T) detail::Destroy::destroy(getAddress<T>())
+  FB_DYNAMIC_APPLY(type_, FB_X);
+#undef FB_X
+  type_ = NULLT;
+  u_.nul = nullptr;
+}
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * Helper for implementing operator<<.  Throws if the type shouldn't
+ * support it.
+ */
+template<class T>
+struct dynamic::PrintImpl {
+  static void print(dynamic const&, std::ostream& out, T const& t) {
+    out << t;
+  }
+};
+template<>
+struct dynamic::PrintImpl<dynamic::ObjectImpl> {
+  static void print(dynamic const& d,
+                    std::ostream& out,
+                    dynamic::ObjectImpl const&) {
+    d.print_as_pseudo_json(out);
+  }
+};
+template<>
+struct dynamic::PrintImpl<dynamic::Array> {
+  static void print(dynamic const& d,
+                    std::ostream& out,
+                    dynamic::Array const&) {
+    d.print_as_pseudo_json(out);
+  }
+};
+
+inline void dynamic::print(std::ostream& out) const {
+#define FB_X(T) PrintImpl<T>::print(*this, out, *getAddress<T>())
+  FB_DYNAMIC_APPLY(type_, FB_X);
+#undef FB_X
+}
+
+inline std::ostream& operator<<(std::ostream& out, dynamic const& d) {
+  d.print(out);
+  return out;
+}
+
+//////////////////////////////////////////////////////////////////////
+
+// Secialization of FormatValue so dynamic objects can be formatted
+template <>
+class FormatValue<dynamic> {
+ public:
+  explicit FormatValue(const dynamic& val) : val_(val) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    switch (val_.type()) {
+    case dynamic::NULLT:
+      FormatValue<std::nullptr_t>(nullptr).format(arg, cb);
+      break;
+    case dynamic::BOOL:
+      FormatValue<bool>(val_.asBool()).format(arg, cb);
+      break;
+    case dynamic::INT64:
+      FormatValue<int64_t>(val_.asInt()).format(arg, cb);
+      break;
+    case dynamic::STRING:
+      FormatValue<fbstring>(val_.asString()).format(arg, cb);
+      break;
+    case dynamic::DOUBLE:
+      FormatValue<double>(val_.asDouble()).format(arg, cb);
+      break;
+    case dynamic::ARRAY:
+      FormatValue(val_.at(arg.splitIntKey())).format(arg, cb);
+      break;
+    case dynamic::OBJECT:
+      FormatValue(val_.at(arg.splitKey().toFbstring())).format(arg, cb);
+      break;
+    }
+  }
+
+ private:
+  const dynamic& val_;
+};
+
+}
+
+#undef FB_DYNAMIC_APPLY
+
+#endif
diff --git a/folly/dynamic.cpp b/folly/dynamic.cpp
new file mode 100644
index 00000000..86f20b12
--- /dev/null
+++ b/folly/dynamic.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/dynamic.h"
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+#define DEF_TYPE(T, str, typen)                                 \
+  template<> char const dynamic::TypeInfo<T>::name[] = str;       \
+  template<> dynamic::Type const dynamic::TypeInfo<T>::type = typen
+
+DEF_TYPE(void*,               "null",    dynamic::NULLT);
+DEF_TYPE(bool,                "boolean", dynamic::BOOL);
+DEF_TYPE(fbstring,            "string",  dynamic::STRING);
+DEF_TYPE(dynamic::Array,      "array",   dynamic::ARRAY);
+DEF_TYPE(double,              "double",  dynamic::DOUBLE);
+DEF_TYPE(int64_t,             "int64",   dynamic::INT64);
+DEF_TYPE(dynamic::ObjectImpl, "object",  dynamic::OBJECT);
+
+#undef DEF_TYPE
+
+//////////////////////////////////////////////////////////////////////
+
+}
+
diff --git a/folly/dynamic.h b/folly/dynamic.h
new file mode 100644
index 00000000..6a5fe0bd
--- /dev/null
+++ b/folly/dynamic.h
@@ -0,0 +1,485 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This is a runtime dynamically typed value.  It holds types from a
+ * specific predetermined set of types (ints, bools, arrays, etc).  In
+ * particular, it can be used as a convenient in-memory representation
+ * for complete json objects.
+ *
+ * In general you can try to use these objects as if they were the
+ * type they represent (although in some cases with a slightly less
+ * complete interface than the raw type), and it'll just throw a
+ * TypeError if it is used in an illegal way.
+ *
+ * Some examples:
+ *
+ *   dynamic twelve = 12;
+ *   dynamic str = "string";
+ *   dynamic map = dynamic::object;
+ *   map[str] = twelve;
+ *   map[str + "another_str"] = { "array", "of", 4, "elements" };
+ *   map.insert("null_element", nullptr);
+ *   ++map[str];
+ *   assert(map[str] == 13);
+ *
+ *   // Building a complex object with a sub array inline:
+ *   dynamic d = dynamic::object
+ *     ("key", "value")
+ *     ("key2", { "a", "array" })
+ *     ;
+ *
+ * Also see folly/json.h for the serialization and deserialization
+ * functions for JSON.
+ *
+ * Note: dynamic is not DefaultConstructible.  Rationale:
+ *
+ *   - The intuitive thing to initialize a defaulted dynamic to would
+ *     be nullptr.
+ *
+ *   - However, the expression dynamic d = {} is required to call the
+ *     default constructor by the standard, which is confusing
+ *     behavior for dynamic unless the default constructor creates an
+ *     empty array.
+ *
+ * Additional documentation is in folly/docs/Dynamic.md.
+ *
+ * @author Jordan DeLong <delong.j@fb.com>
+ */
+
+#ifndef FOLLY_DYNAMIC_H_
+#define FOLLY_DYNAMIC_H_
+
+#include <unordered_map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <ostream>
+#include <type_traits>
+#include <initializer_list>
+#include <cstdint>
+#include <boost/operators.hpp>
+
+#include "folly/Traits.h"
+#include "folly/FBVector.h"
+#include "folly/FBString.h"
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+struct dynamic;
+struct TypeError;
+template<> FOLLY_ASSUME_RELOCATABLE(dynamic);
+
+//////////////////////////////////////////////////////////////////////
+
+struct dynamic : private boost::operators<dynamic> {
+  enum Type {
+    NULLT,
+    ARRAY,
+    BOOL,
+    DOUBLE,
+    INT64,
+    OBJECT,
+    STRING,
+  };
+
+  /*
+   * We support direct iteration of arrays, and indirect iteration of objects.
+   * See begin(), end(), keys(), values(), and items() for more.
+   *
+   * Array iterators dereference as the elements in the array.
+   * Object key iterators dereference as the keys in the object.
+   * Object value iterators dereference as the values in the object.
+   * Object item iterators dereference as pairs of (key, value).
+   */
+private:
+  typedef fbvector<dynamic> Array;
+public:
+  typedef Array::const_iterator const_iterator;
+  struct const_key_iterator;
+  struct const_value_iterator;
+  struct const_item_iterator;
+
+  /*
+   * Creation routines for making dynamic objects.  Objects are maps
+   * from key to value (so named due to json-related origins here).
+   *
+   * Example:
+   *
+   *   // Make a fairly complex dynamic:
+   *   dynamic d = dynamic::object("key", "value1")
+   *                              ("key2", { "value", "with", 4, "words" });
+   *
+   *   // Build an object in a few steps:
+   *   dynamic d = dynamic::object;
+   *   d["key"] = 12;
+   *   d["something_else"] = { 1, 2, 3, nullptr };
+   */
+private:
+  struct ObjectMaker;
+
+public:
+  template<class... Args> static ObjectMaker object(Args&&...);
+
+  /*
+   * String compatibility constructors.
+   */
+  /* implicit */ dynamic(char const* val);
+  /* implicit */ dynamic(std::string const& val);
+
+  /*
+   * This is part of the plumbing for object(), above.  Used to create
+   * a new object dynamic.
+   */
+  /* implicit */ dynamic(ObjectMaker (*)());
+  /* implicit */ dynamic(ObjectMaker const&) = delete;
+  /* implicit */ dynamic(ObjectMaker&&);
+
+  /*
+   * Create a new array from an initializer list.
+   *
+   * For example:
+   *
+   *   dynamic v = { 1, 2, 3, "foo" };
+   */
+  /* implicit */ dynamic(std::initializer_list<dynamic> il);
+
+  /*
+   * Conversion constructors from most of the other types.
+   */
+  template<class T> /* implicit */ dynamic(T t);
+
+  /*
+   * Create a dynamic that is an array of the values from the supplied
+   * iterator range.
+   */
+  template<class Iterator> dynamic(Iterator first, Iterator last);
+
+  dynamic(dynamic const&);
+  dynamic(dynamic&&);
+  ~dynamic();
+
+  /*
+   * "Deep" equality comparison.  This will compare all the way down
+   * an object or array, and is potentially expensive.
+   */
+  bool operator==(dynamic const& o) const;
+
+  /*
+   * For all types except object this returns the natural ordering on
+   * those types.  For objects, we throw TypeError.
+   */
+  bool operator<(dynamic const& o) const;
+
+  /*
+   * General operators.
+   *
+   * These throw TypeError when used with types or type combinations
+   * that don't support them.
+   *
+   * These functions may also throw if you use 64-bit integers with
+   * doubles when the integers are too big to fit in a double.
+   */
+  dynamic& operator+=(dynamic const&);
+  dynamic& operator-=(dynamic const&);
+  dynamic& operator*=(dynamic const&);
+  dynamic& operator/=(dynamic const&);
+  dynamic& operator%=(dynamic const&);
+  dynamic& operator|=(dynamic const&);
+  dynamic& operator&=(dynamic const&);
+  dynamic& operator^=(dynamic const&);
+  dynamic& operator++();
+  dynamic& operator--();
+
+  /*
+   * Assignment from other dynamics.  Because of the implicit conversion
+   * to dynamic from its potential types, you can use this to change the
+   * type pretty intuitively.
+   *
+   * Basic guarantee only.
+   */
+  dynamic& operator=(dynamic const&);
+  dynamic& operator=(dynamic&&);
+
+  /*
+   * For simple dynamics (not arrays or objects), this prints the
+   * value to an std::ostream in the expected way.  Respects the
+   * formatting manipulators that have been sent to the stream
+   * already.
+   *
+   * If the dynamic holds an object or array, this prints them in a
+   * format very similar to JSON.  (It will in fact actually be JSON
+   * as long as the dynamic validly represents a JSON object---i.e. it
+   * can't have non-string keys.)
+   */
+  friend std::ostream& operator<<(std::ostream&, dynamic const&);
+
+  /*
+   * Returns true if this dynamic is of the specified type.
+   */
+  bool isString() const;
+  bool isObject() const;
+  bool isBool() const;
+  bool isNull() const;
+  bool isArray() const;
+  bool isDouble() const;
+  bool isInt() const;
+
+  /*
+   * Returns: isInt() || isDouble().
+   */
+  bool isNumber() const;
+
+  /*
+   * Returns the type of this dynamic.
+   */
+  Type type() const;
+
+  /*
+   * Extract a value while trying to convert to the specified type.
+   * Throws exceptions if we cannot convert from the real type to the
+   * requested type.
+   *
+   * Note you can only use this to access integral types or strings,
+   * since arrays and objects are generally best delt with as a
+   * dynamic.
+   */
+  fbstring asString() const;
+  double   asDouble() const;
+  int64_t  asInt() const;
+  bool     asBool() const;
+
+  /*
+   * Returns: true if this dynamic is null, an empty array, an empty
+   * object, or an empty string.
+   */
+  bool empty() const;
+
+  /*
+   * If this is an array or an object, returns the number of elements
+   * contained.  If it is a string, returns the length.  Otherwise
+   * throws TypeError.
+   */
+  std::size_t size() const;
+
+  /*
+   * You can iterate over the values of the array.  Calling these on
+   * non-arrays will throw a TypeError.
+   */
+  const_iterator begin()  const;
+  const_iterator end()    const;
+
+private:
+  /*
+   * Helper object returned by keys(), values(), and items().
+   */
+  template <class T> struct IterableProxy;
+
+public:
+  /*
+   * You can iterate over the keys, values, or items (std::pair of key and
+   * value) in an object.  Calling these on non-objects will throw a TypeError.
+   */
+  IterableProxy<const_key_iterator> keys() const;
+  IterableProxy<const_value_iterator> values() const;
+  IterableProxy<const_item_iterator> items() const;
+
+  /*
+   * AssociativeContainer-style find interface for objects.  Throws if
+   * this is not an object.
+   *
+   * Returns: end() if the key is not present, or an iterator pointing
+   * to the item.
+   */
+  const_item_iterator find(dynamic const&) const;
+
+  /*
+   * If this is an object, returns whether it contains a field with
+   * the given name.  Otherwise throws TypeError.
+   */
+  std::size_t count(dynamic const&) const;
+
+  /*
+   * For objects or arrays, provides access to sub-fields by index or
+   * field name.
+   *
+   * Using these with dynamic objects that are not arrays or objects
+   * will throw a TypeError.  Using an index that is out of range or
+   * object-element that's not present throws std::out_of_range.
+   */
+  dynamic const& at(dynamic const&) const;
+  dynamic&       at(dynamic const&);
+
+  /*
+   * This works for access to both objects and arrays.
+   *
+   * In the case of an array, the index must be an integer, and this will throw
+   * std::out_of_range if it is less than zero or greater than size().
+   *
+   * In the case of an object, the non-const overload inserts a null
+   * value if the key isn't present.  The const overload will throw
+   * std::out_of_range if the key is not present.
+   *
+   * These functions do not invalidate iterators.
+   */
+  dynamic&       operator[](dynamic const&);
+  dynamic const& operator[](dynamic const&) const;
+
+  /*
+   * Only defined for objects, throws TypeError otherwise.
+   *
+   * getDefault will return the value associated with the supplied key, the
+   * supplied default otherwise. setDefault will set the key to the supplied
+   * default if it is not yet set, otherwise leaving it. setDefault returns
+   * a reference to the existing value if present, the new value otherwise.
+   */
+  dynamic
+  getDefault(const dynamic& k, const dynamic& v = dynamic::object) const;
+  dynamic&& getDefault(const dynamic& k, dynamic&& v) const;
+  template<class K, class V = dynamic>
+  dynamic& setDefault(K&& k, V&& v = dynamic::object);
+
+  /*
+   * Resizes an array so it has at n elements, using the supplied
+   * default to fill new elements.  Throws TypeError if this dynamic
+   * is not an array.
+   *
+   * May invalidate iterators.
+   *
+   * Post: size() == n
+   */
+  void resize(std::size_t n, dynamic const& = nullptr);
+
+  /*
+   * Inserts the supplied key-value pair to an object, or throws if
+   * it's not an object.
+   *
+   * Invalidates iterators.
+   */
+  template<class K, class V> void insert(K&&, V&& val);
+
+  /*
+   * Erase an element from a dynamic object, by key.
+   *
+   * Invalidates iterators to the element being erased.
+   *
+   * Returns the number of elements erased (i.e. 1 or 0).
+   */
+  std::size_t erase(dynamic const& key);
+
+  /*
+   * Erase an element from a dynamic object or array, using an
+   * iterator or an iterator range.
+   *
+   * In arrays, invalidates iterators to elements after the element
+   * being erased.  In objects, invalidates iterators to the elements
+   * being erased.
+   *
+   * Returns a new iterator to the first element beyond any elements
+   * removed, or end() if there are none.  (The iteration order does
+   * not change.)
+   */
+  const_iterator erase(const_iterator it);
+  const_iterator erase(const_iterator first, const_iterator last);
+
+  const_key_iterator erase(const_key_iterator it);
+  const_key_iterator erase(const_key_iterator first, const_key_iterator last);
+
+  const_value_iterator erase(const_value_iterator it);
+  const_value_iterator erase(const_value_iterator first,
+                             const_value_iterator last);
+
+  const_item_iterator erase(const_item_iterator it);
+  const_item_iterator erase(const_item_iterator first,
+                            const_item_iterator last);
+  /*
+   * Append elements to an array.  If this is not an array, throws
+   * TypeError.
+   *
+   * Invalidates iterators.
+   */
+  void push_back(dynamic const&);
+  void push_back(dynamic&&);
+
+  /*
+   * Get a hash code.  This function is called by a std::hash<>
+   * specialization, also.
+   *
+   * Throws TypeError if this is an object, array, or null.
+   */
+  std::size_t hash() const;
+
+private:
+  friend struct TypeError;
+  struct ObjectImpl;
+  struct ObjectMaker;
+  template<class T> struct TypeInfo;
+  template<class T> struct CompareOp;
+  template<class T> struct GetAddrImpl;
+  template<class T> struct PrintImpl;
+
+  template<class T> T const& get() const;
+  template<class T> T&       get();
+  template<class T> T*       get_nothrow();
+  template<class T> T const* get_nothrow() const;
+  template<class T> T*       getAddress();
+  template<class T> T const* getAddress() const;
+
+  template<class T> T asImpl() const;
+
+  static char const* typeName(Type);
+  void destroy();
+  void print(std::ostream&) const;
+  void print_as_pseudo_json(std::ostream&) const; // see json.cpp
+
+private:
+  Type type_;
+  union Data {
+    explicit Data() : nul(nullptr) {}
+    ~Data() {}
+
+    // XXX: gcc does an ICE if we use std::nullptr_t instead of void*
+    // here.  See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50361
+    void* nul;
+    Array array;
+    bool boolean;
+    double doubl;
+    int64_t integer;
+    fbstring string;
+
+    /*
+     * Objects are placement new'd here.  We have to use a char buffer
+     * because we don't know the type here (std::unordered_map<> with
+     * dynamic would be parameterizing a std:: template with an
+     * incomplete type right now).  (Note that in contrast we know it
+     * is ok to do this with fbvector because we own it.)
+     */
+    typename std::aligned_storage<
+      sizeof(std::unordered_map<int,int>),
+      alignof(std::unordered_map<int,int>)
+    >::type objectBuffer;
+  } u_;
+};
+
+//////////////////////////////////////////////////////////////////////
+
+}
+
+#include "folly/dynamic-inl.h"
+
+#endif
diff --git a/folly/eventfd.h b/folly/eventfd.h
new file mode 100644
index 00000000..fdd895d6
--- /dev/null
+++ b/folly/eventfd.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Wrapper around the eventfd system call, as defined in <sys/eventfd.h>
+ * in glibc 2.9+.
+ *
+ * @author Tudor Bosman (tudorb@fb.com)
+ */
+
+#ifndef FOLLY_BASE_EVENTFD_H_
+#define FOLLY_BASE_EVENTFD_H_
+
+#ifndef __linux__
+#error This file may be compiled on Linux only.
+#endif
+
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+// Use existing __NR_eventfd2 if already defined
+// Values from the Linux kernel source:
+// arch/x86/include/asm/unistd_{32,64}.h
+#ifndef __NR_eventfd2
+#if defined(__x86_64__)
+#define __NR_eventfd2  290
+#elif defined(__i386__)
+#define __NR_eventfd2  328
+#else
+#error "Can't define __NR_eventfd2 for your architecture."
+#endif
+#endif
+
+#ifndef EFD_SEMAPHORE
+#define EFD_SEMAPHORE 1
+#endif
+
+/* from linux/fcntl.h - this conflicts with fcntl.h so include just the #define
+ * we need
+ */
+#ifndef O_CLOEXEC
+#define O_CLOEXEC 02000000 /* set close_on_exec */
+#endif
+
+#ifndef EFD_CLOEXEC
+#define EFD_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef EFD_NONBLOCK
+#define EFD_NONBLOCK O_NONBLOCK
+#endif
+
+namespace folly {
+
+// http://www.kernel.org/doc/man-pages/online/pages/man2/eventfd.2.html
+inline int eventfd(unsigned int initval, int flags) {
+  // Use the eventfd2 system call, as in glibc 2.9+
+  // (requires kernel 2.6.30+)
+  return syscall(__NR_eventfd2, initval, flags);
+}
+
+}  // namespace folly
+
+#endif /* FOLLY_BASE_EVENTFD_H_ */
+
diff --git a/folly/experimental/Bits.h b/folly/experimental/Bits.h
new file mode 100644
index 00000000..31b12376
--- /dev/null
+++ b/folly/experimental/Bits.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_EXPERIMENTAL_BITS_H_
+#define FOLLY_EXPERIMENTAL_BITS_H_
+
+#include <cstddef>
+#include <type_traits>
+#include <limits>
+
+#include "folly/Range.h"
+
+namespace folly {
+
+/**
+ * Population count (number of bits set), using __builtin_popcount or
+ * __builtin_popcountll, depending on size.
+ */
+template <class T>
+inline typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_unsigned<T>::value &&
+   sizeof(T) <= sizeof(unsigned int)),
+  size_t>::type
+  popcount(T x) {
+  return __builtin_popcount(x);
+}
+
+template <class T>
+inline typename std::enable_if<
+  (std::is_integral<T>::value &&
+   std::is_unsigned<T>::value &&
+   sizeof(T) > sizeof(unsigned int) &&
+   sizeof(T) <= sizeof(unsigned long long)),
+  size_t>::type
+  popcount(T x) {
+  return __builtin_popcountll(x);
+}
+
+template <class T>
+struct Bits {
+  static_assert(std::is_integral<T>::value &&
+                std::is_unsigned<T>::value,
+                "Unsigned integral type required");
+
+  typedef T type;
+  static constexpr size_t bitsPerBlock = std::numeric_limits<T>::digits;
+
+  /**
+   * Byte index of the given bit.
+   */
+  static constexpr size_t blockIndex(size_t bit) {
+    return bit / bitsPerBlock;
+  }
+
+  /**
+   * Offset in block of the given bit.
+   */
+  static constexpr size_t bitOffset(size_t bit) {
+    return bit % bitsPerBlock;
+  }
+
+  /**
+   * Number of blocks used by the given number of bits.
+   */
+  static constexpr size_t blockCount(size_t nbits) {
+    return nbits / bitsPerBlock + (nbits % bitsPerBlock != 0);
+  }
+
+  /**
+   * Set the given bit.
+   */
+  static void set(T* p, size_t bit);
+
+  /**
+   * Clear the given bit.
+   */
+  static void clear(T* p, size_t bit);
+
+  /**
+   * Test the given bit.
+   */
+  static bool test(const T* p, size_t bit);
+
+  /**
+   * Count the number of bits set in a range of blocks.
+   */
+  static size_t count(const T* begin, const T* end);
+
+ private:
+  static constexpr T one = T(1);
+};
+
+template <class T>
+inline void Bits<T>::set(T* p, size_t bit) {
+  p[blockIndex(bit)] |= (one << bitOffset(bit));
+}
+
+template <class T>
+inline void Bits<T>::clear(T* p, size_t bit) {
+  p[blockIndex(bit)] &= ~(one << bitOffset(bit));
+}
+
+template <class T>
+inline bool Bits<T>::test(const T* p, size_t bit) {
+  return p[blockIndex(bit)] & (one << bitOffset(bit));
+}
+
+template <class T>
+inline size_t Bits<T>::count(const T* begin, const T* end) {
+  size_t n = 0;
+  for (; begin != end; ++begin) {
+    n += popcount(*begin);
+  }
+  return n;
+}
+
+}  // namespace folly
+
+#endif /* FOLLY_EXPERIMENTAL_BITS_H_ */
+
diff --git a/folly/experimental/TestUtil.cpp b/folly/experimental/TestUtil.cpp
new file mode 100644
index 00000000..d9bcd042
--- /dev/null
+++ b/folly/experimental/TestUtil.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/experimental/TestUtil.h"
+
+#include <stdlib.h>
+#include <errno.h>
+#include <stdexcept>
+#include <system_error>
+
+#include "folly/Format.h"
+
+namespace folly {
+namespace test {
+
+TemporaryFile::TemporaryFile(const char* prefix, Scope scope,
+                             bool closeOnDestruction)
+  : scope_(scope),
+    closeOnDestruction_(closeOnDestruction) {
+  static const char* suffix = ".XXXXXX";  // per mkstemp(3)
+  if (!prefix || prefix[0] == '\0') {
+    prefix = "temp";
+  }
+  const char* dir = nullptr;
+  if (!strchr(prefix, '/')) {
+    // Not a full path, try getenv("TMPDIR") or "/tmp"
+    dir = getenv("TMPDIR");
+    if (!dir) {
+      dir = "/tmp";
+    }
+    // The "!" is a placeholder to ensure that &(path[0]) is null-terminated.
+    // This is the only standard-compliant way to get at a null-terminated
+    // non-const char string inside a std::string: put the null-terminator
+    // yourself.
+    path_ = format("{}/{}{}!", dir, prefix, suffix).str();
+  } else {
+    path_ = format("{}{}!", prefix, suffix).str();
+  }
+
+  // Replace the '!' with a null terminator, we'll get rid of it later
+  path_[path_.size() - 1] = '\0';
+
+  fd_ = mkstemp(&(path_[0]));
+  if (fd_ == -1) {
+    throw std::system_error(errno, std::system_category(),
+                            format("mkstemp failed: {}", path_).str().c_str());
+  }
+
+  DCHECK_EQ(path_[path_.size() - 1], '\0');
+  path_.erase(path_.size() - 1);
+
+  if (scope_ == Scope::UNLINK_IMMEDIATELY) {
+    if (unlink(path_.c_str()) == -1) {
+      throw std::system_error(errno, std::system_category(),
+                              format("unlink failed: {}", path_).str().c_str());
+    }
+    path_.clear();  // path no longer available or meaningful
+  }
+}
+
+const std::string& TemporaryFile::path() const {
+  CHECK(scope_ != Scope::UNLINK_IMMEDIATELY);
+  DCHECK(!path_.empty());
+  return path_;
+}
+
+TemporaryFile::~TemporaryFile() {
+  if (fd_ != -1 && closeOnDestruction_) {
+    if (close(fd_) == -1) {
+      PLOG(ERROR) << "close failed";
+    }
+  }
+
+  // If we previously failed to unlink() (UNLINK_IMMEDIATELY), we'll
+  // try again here.
+  if (scope_ != Scope::PERMANENT && !path_.empty()) {
+    if (unlink(path_.c_str()) == -1) {
+      PLOG(ERROR) << "unlink(" << path_ << ") failed";
+    }
+  }
+}
+
+}  // namespace test
+}  // namespace folly
diff --git a/folly/experimental/TestUtil.h b/folly/experimental/TestUtil.h
new file mode 100644
index 00000000..74d67a97
--- /dev/null
+++ b/folly/experimental/TestUtil.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_TESTUTIL_H_
+#define FOLLY_TESTUTIL_H_
+
+#include <string>
+
+namespace folly {
+namespace test {
+
+/**
+ * Temporary file.
+ *
+ * By default, the file is created in a system-specific location (the value
+ * of the TMPDIR environment variable, or /tmp), but you can override that
+ * by making "prefix" be a path (containing a '/'; use a prefix starting with
+ * './' to create a file in the current directory).
+ *
+ * By default, the file is closed and deleted when the TemporaryFile object
+ * is destroyed, but both these behaviors can be overridden with arguments
+ * to the constructor.
+ */
+class TemporaryFile {
+ public:
+  enum class Scope {
+    PERMANENT,
+    UNLINK_IMMEDIATELY,
+    UNLINK_ON_DESTRUCTION
+  };
+  explicit TemporaryFile(const char* prefix=nullptr,
+                         Scope scope=Scope::UNLINK_ON_DESTRUCTION,
+                         bool closeOnDestruction=true);
+  ~TemporaryFile();
+
+  int fd() const { return fd_; }
+  const std::string& path() const;
+
+ private:
+  Scope scope_;
+  bool closeOnDestruction_;
+  int fd_;
+  std::string path_;
+};
+
+}  // namespace test
+}  // namespace folly
+
+#endif /* FOLLY_TESTUTIL_H_ */
+
diff --git a/folly/experimental/io/Cursor.h b/folly/experimental/io/Cursor.h
new file mode 100644
index 00000000..fbd77e5a
--- /dev/null
+++ b/folly/experimental/io/Cursor.h
@@ -0,0 +1,411 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_CURSOR_H
+#define FOLLY_CURSOR_H
+
+#include <assert.h>
+#include <stdexcept>
+#include <string.h>
+#include <type_traits>
+#include <memory>
+
+#include "folly/Bits.h"
+#include "folly/experimental/io/IOBuf.h"
+#include "folly/Likely.h"
+
+/**
+ * Cursor class for fast iteration over IOBuf chains.
+ *
+ * Cursor - Read-only access
+ *
+ * RWPrivateCursor - Read-write access, assumes private access to IOBuf chain
+ * RWUnshareCursor - Read-write access, calls unshare on write (COW)
+ * Appender        - Write access, assumes private access to IOBuf chian
+ *
+ * Note that RW cursors write in the preallocated part of buffers (that is,
+ * between the buffer's data() and tail()), while Appenders append to the end
+ * of the buffer (between the buffer's tail() and bufferEnd()).  Appenders
+ * automatically adjust the buffer pointers, so you may only use one
+ * Appender with a buffer chain; for this reason, Appenders assume private
+ * access to the buffer (you need to call unshare() yourself if necessary).
+ **/
+namespace folly { namespace io {
+namespace detail {
+
+template <class Derived, typename BufType>
+class CursorBase {
+ public:
+  const uint8_t* data() const {
+    return crtBuf_->data() + offset_;
+  }
+
+  // Space available in the current IOBuf.  May be 0; use peek() instead which
+  // will always point to a non-empty chunk of data or at the end of the
+  // chain.
+  size_t length() const {
+    return crtBuf_->length() - offset_;
+  }
+
+  Derived& operator+=(size_t offset) {
+    Derived* p = static_cast<Derived*>(this);
+    p->skip(offset);
+    return *p;
+  }
+
+  template <class T>
+  typename std::enable_if<std::is_integral<T>::value, T>::type
+  read() {
+    T val;
+    pull(&val, sizeof(T));
+    return val;
+  }
+
+  template <class T>
+  T readBE() {
+    return Endian::big(read<T>());
+  }
+
+  template <class T>
+  T readLE() {
+    return Endian::little(read<T>());
+  }
+
+  explicit CursorBase(BufType* buf)
+    : crtBuf_(buf)
+    , offset_(0)
+    , buffer_(buf) {}
+
+  // Make all the templated classes friends for copy constructor.
+  template <class D, typename B> friend class CursorBase;
+
+  template <class T>
+  explicit CursorBase(const T& cursor) {
+    crtBuf_ = cursor.crtBuf_;
+    offset_ = cursor.offset_;
+    buffer_ = cursor.buffer_;
+  }
+
+  // reset cursor to point to a new buffer.
+  void reset(BufType* buf) {
+    crtBuf_ = buf;
+    buffer_ = buf;
+    offset_ = 0;
+  }
+
+  /**
+   * Return the available data in the current buffer.
+   * If you want to gather more data from the chain into a contiguous region
+   * (for hopefully zero-copy access), use gather() before peek().
+   */
+  std::pair<const uint8_t*, size_t> peek() {
+    // Ensure that we're pointing to valid data
+    size_t available = length();
+    while (UNLIKELY(available == 0 && tryAdvanceBuffer())) {
+      available = length();
+    }
+
+    return std::make_pair(data(), available);
+  }
+
+  void pull(void* buf, size_t length) {
+    if (UNLIKELY(pullAtMost(buf, length) != length)) {
+      throw std::out_of_range("underflow");
+    }
+  }
+
+  void skip(size_t length) {
+    if (UNLIKELY(skipAtMost(length) != length)) {
+      throw std::out_of_range("underflow");
+    }
+  }
+
+  size_t pullAtMost(void* buf, size_t len) {
+    uint8_t* p = reinterpret_cast<uint8_t*>(buf);
+    size_t copied = 0;
+    for (;;) {
+      // Fast path: it all fits in one buffer.
+      size_t available = length();
+      if (LIKELY(available >= len)) {
+        memcpy(p, data(), len);
+        offset_ += len;
+        return copied + len;
+      }
+
+      memcpy(p, data(), available);
+      copied += available;
+      if (UNLIKELY(!tryAdvanceBuffer())) {
+        return copied;
+      }
+      p += available;
+      len -= available;
+    }
+  }
+
+  size_t skipAtMost(size_t len) {
+    size_t skipped = 0;
+    for (;;) {
+      // Fast path: it all fits in one buffer.
+      size_t available = length();
+      if (LIKELY(available >= len)) {
+        offset_ += len;
+        return skipped + len;
+      }
+
+      skipped += available;
+      if (UNLIKELY(!tryAdvanceBuffer())) {
+        return skipped;
+      }
+      len -= available;
+    }
+  }
+
+ protected:
+  BufType* crtBuf_;
+  size_t offset_;
+
+  ~CursorBase(){}
+
+  bool tryAdvanceBuffer() {
+    BufType* nextBuf = crtBuf_->next();
+    if (UNLIKELY(nextBuf == buffer_)) {
+      offset_ = crtBuf_->length();
+      return false;
+    }
+
+    offset_ = 0;
+    crtBuf_ = nextBuf;
+    static_cast<Derived*>(this)->advanceDone();
+    return true;
+  }
+
+ private:
+  void advanceDone() {
+  }
+
+  BufType* buffer_;
+};
+
+template <class Derived>
+class Writable {
+ public:
+  template <class T>
+  typename std::enable_if<std::is_integral<T>::value>::type
+  write(T value) {
+    const uint8_t* u8 = reinterpret_cast<const uint8_t*>(&value);
+    push(u8, sizeof(T));
+  }
+
+  template <class T>
+  void writeBE(T value) {
+    write(Endian::big(value));
+  }
+
+  template <class T>
+  void writeLE(T value) {
+    write(Endian::little(value));
+  }
+
+  void push(const uint8_t* buf, size_t len) {
+    Derived* d = static_cast<Derived*>(this);
+    if (d->pushAtMost(buf, len) != len) {
+      throw std::out_of_range("overflow");
+    }
+  }
+};
+
+} // namespace detail
+
+class Cursor : public detail::CursorBase<Cursor, const IOBuf> {
+ public:
+  explicit Cursor(const IOBuf* buf)
+    : detail::CursorBase<Cursor, const IOBuf>(buf) {}
+
+  template <class CursorType>
+  explicit Cursor(CursorType& cursor)
+    : detail::CursorBase<Cursor, const IOBuf>(cursor) {}
+};
+
+enum class CursorAccess {
+  PRIVATE,
+  UNSHARE
+};
+
+template <CursorAccess access>
+class RWCursor
+  : public detail::CursorBase<RWCursor<access>, IOBuf>,
+    public detail::Writable<RWCursor<access>> {
+  friend class detail::CursorBase<RWCursor<access>, IOBuf>;
+ public:
+  explicit RWCursor(IOBuf* buf)
+    : detail::CursorBase<RWCursor<access>, IOBuf>(buf),
+      maybeShared_(true) {}
+
+  template <class CursorType>
+  explicit RWCursor(CursorType& cursor)
+    : detail::CursorBase<RWCursor<access>, IOBuf>(cursor),
+      maybeShared_(true) {}
+  /**
+   * Gather at least n bytes contiguously into the current buffer,
+   * by coalescing subsequent buffers from the chain as necessary.
+   */
+  void gather(size_t n) {
+    this->crtBuf_->gather(this->offset_ + n);
+  }
+
+  size_t pushAtMost(const uint8_t* buf, size_t len) {
+    size_t copied = 0;
+    for (;;) {
+      // Fast path: the current buffer is big enough.
+      size_t available = this->length();
+      if (LIKELY(available >= len)) {
+        if (access == CursorAccess::UNSHARE) {
+          maybeUnshare();
+        }
+        memcpy(writableData(), buf, len);
+        this->offset_ += len;
+        return copied + len;
+      }
+
+      if (access == CursorAccess::UNSHARE) {
+        maybeUnshare();
+      }
+      memcpy(writableData(), buf, available);
+      copied += available;
+      if (UNLIKELY(!this->tryAdvanceBuffer())) {
+        return copied;
+      }
+      buf += available;
+      len -= available;
+    }
+  }
+
+  uint8_t* writableData() {
+    return this->crtBuf_->writableData() + this->offset_;
+  }
+
+ private:
+  void maybeUnshare() {
+    if (UNLIKELY(maybeShared_)) {
+      this->crtBuf_->unshareOne();
+      maybeShared_ = false;
+    }
+  }
+
+  void advanceDone() {
+    maybeShared_ = true;
+  }
+
+  bool maybeShared_;
+};
+
+typedef RWCursor<CursorAccess::PRIVATE> RWPrivateCursor;
+typedef RWCursor<CursorAccess::UNSHARE> RWUnshareCursor;
+
+/**
+ * Append to the end of a buffer chain, growing the chain (by allocating new
+ * buffers) in increments of at least growth bytes every time.  Won't grow
+ * (and push() and ensure() will throw) if growth == 0.
+ *
+ * TODO(tudorb): add a flavor of Appender that reallocates one IOBuf instead
+ * of chaining.
+ */
+class Appender : public detail::Writable<Appender> {
+ public:
+  Appender(IOBuf* buf, uint32_t growth)
+    : buffer_(buf),
+      crtBuf_(buf->prev()),
+      growth_(growth) {
+  }
+
+  uint8_t* writableData() {
+    return crtBuf_->writableTail();
+  }
+
+  size_t length() const {
+    return crtBuf_->tailroom();
+  }
+
+  /**
+   * Mark n bytes (must be <= length()) as appended, as per the
+   * IOBuf::append() method.
+   */
+  void append(size_t n) {
+    crtBuf_->append(n);
+  }
+
+  /**
+   * Ensure at least n contiguous bytes available to write.
+   * Postcondition: length() >= n.
+   */
+  void ensure(uint32_t n) {
+    if (LIKELY(length() >= n)) {
+      return;
+    }
+
+    // Waste the rest of the current buffer and allocate a new one.
+    // Don't make it too small, either.
+    if (growth_ == 0) {
+      throw std::out_of_range("can't grow buffer chain");
+    }
+
+    n = std::max(n, growth_);
+    buffer_->prependChain(IOBuf::create(n));
+    crtBuf_ = buffer_->prev();
+  }
+
+  size_t pushAtMost(const uint8_t* buf, size_t len) {
+    size_t copied = 0;
+    for (;;) {
+      // Fast path: it all fits in one buffer.
+      size_t available = length();
+      if (LIKELY(available >= len)) {
+        memcpy(writableData(), buf, len);
+        append(len);
+        return copied + len;
+      }
+
+      memcpy(writableData(), buf, available);
+      append(available);
+      copied += available;
+      if (UNLIKELY(!tryGrowChain())) {
+        return copied;
+      }
+      buf += available;
+      len -= available;
+    }
+  }
+
+ private:
+  bool tryGrowChain() {
+    assert(crtBuf_->next() == buffer_);
+    if (growth_ == 0) {
+      return false;
+    }
+
+    buffer_->prependChain(IOBuf::create(growth_));
+    crtBuf_ = buffer_->prev();
+    return true;
+  }
+
+  IOBuf* buffer_;
+  IOBuf* crtBuf_;
+  uint32_t growth_;
+};
+
+}}  // folly::io
+
+#endif // FOLLY_CURSOR_H
diff --git a/folly/experimental/io/IOBuf.cpp b/folly/experimental/io/IOBuf.cpp
new file mode 100644
index 00000000..16c4e551
--- /dev/null
+++ b/folly/experimental/io/IOBuf.cpp
@@ -0,0 +1,600 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define __STDC_LIMIT_MACROS
+
+#include "folly/experimental/io/IOBuf.h"
+
+#include "folly/Malloc.h"
+#include "folly/Likely.h"
+
+#include <stdexcept>
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+using std::unique_ptr;
+
+namespace folly {
+
+const uint32_t IOBuf::kMaxIOBufSize;
+// Note: Applying offsetof() to an IOBuf is legal according to C++11, since
+// IOBuf is a standard-layout class.  However, this isn't legal with earlier
+// C++ standards, which require that offsetof() only be used with POD types.
+//
+// This code compiles with g++ 4.6, but not with g++ 4.4 or earlier versions.
+const uint32_t IOBuf::kMaxInternalDataSize =
+  kMaxIOBufSize - offsetof(folly::IOBuf, int_.buf);
+
+IOBuf::SharedInfo::SharedInfo()
+  : freeFn(NULL),
+    userData(NULL) {
+  // Use relaxed memory ordering here.  Since we are creating a new SharedInfo,
+  // no other threads should be referring to it yet.
+  refcount.store(1, std::memory_order_relaxed);
+}
+
+IOBuf::SharedInfo::SharedInfo(FreeFunction fn, void* arg)
+  : freeFn(fn),
+    userData(arg) {
+  // Use relaxed memory ordering here.  Since we are creating a new SharedInfo,
+  // no other threads should be referring to it yet.
+  refcount.store(1, std::memory_order_relaxed);
+}
+
+void* IOBuf::operator new(size_t size) {
+  // Since IOBuf::create() manually allocates space for some IOBuf objects
+  // using malloc(), override operator new so that all IOBuf objects are
+  // always allocated using malloc().  This way operator delete can always know
+  // that free() is the correct way to deallocate the memory.
+  void* ptr = malloc(size);
+
+  // operator new is not allowed to return NULL
+  if (UNLIKELY(ptr == NULL)) {
+    throw std::bad_alloc();
+  }
+
+  return ptr;
+}
+
+void* IOBuf::operator new(size_t size, void* ptr) {
+  assert(size <= kMaxIOBufSize);
+  return ptr;
+}
+
+void IOBuf::operator delete(void* ptr) {
+  // For small buffers, IOBuf::create() manually allocates the space for the
+  // IOBuf object using malloc().  Therefore we override delete to ensure that
+  // the IOBuf space is freed using free() rather than a normal delete.
+  free(ptr);
+}
+
+unique_ptr<IOBuf> IOBuf::create(uint32_t capacity) {
+  // If the desired capacity is less than kMaxInternalDataSize,
+  // just allocate a single region large enough for both the IOBuf header and
+  // the data.
+  if (capacity <= kMaxInternalDataSize) {
+    void* buf = malloc(kMaxIOBufSize);
+    if (UNLIKELY(buf == NULL)) {
+      throw std::bad_alloc();
+    }
+
+    uint8_t* bufEnd = static_cast<uint8_t*>(buf) +kMaxIOBufSize;
+    unique_ptr<IOBuf> iobuf(new(buf) IOBuf(bufEnd));
+    assert(iobuf->capacity() >= capacity);
+    return iobuf;
+  }
+
+  // Allocate an external buffer
+  uint8_t* buf;
+  SharedInfo* sharedInfo;
+  uint32_t actualCapacity;
+  allocExtBuffer(capacity, &buf, &sharedInfo, &actualCapacity);
+
+  // Allocate the IOBuf header
+  try {
+    return unique_ptr<IOBuf>(new IOBuf(kExtAllocated, 0,
+                                       buf, actualCapacity,
+                                       buf, 0,
+                                       sharedInfo));
+  } catch (...) {
+    free(buf);
+    throw;
+  }
+}
+
+unique_ptr<IOBuf> IOBuf::takeOwnership(void* buf, uint32_t capacity,
+                                       FreeFunction freeFn,
+                                       void* userData,
+                                       bool freeOnError) {
+  SharedInfo* sharedInfo = NULL;
+  try {
+    sharedInfo = new SharedInfo(freeFn, userData);
+
+    uint8_t* bufPtr = static_cast<uint8_t*>(buf);
+    return unique_ptr<IOBuf>(new IOBuf(kExtUserSupplied, kFlagFreeSharedInfo,
+                                       bufPtr, capacity,
+                                       bufPtr, capacity,
+                                       sharedInfo));
+  } catch (...) {
+    delete sharedInfo;
+    if (freeOnError) {
+      if (freeFn) {
+        try {
+          freeFn(buf, userData);
+        } catch (...) {
+          // The user's free function is not allowed to throw.
+          abort();
+        }
+      } else {
+        free(buf);
+      }
+    }
+    throw;
+  }
+}
+
+unique_ptr<IOBuf> IOBuf::wrapBuffer(const void* buf, uint32_t capacity) {
+  // We cast away the const-ness of the buffer here.
+  // This is okay since IOBuf users must use unshare() to create a copy of
+  // this buffer before writing to the buffer.
+  uint8_t* bufPtr = static_cast<uint8_t*>(const_cast<void*>(buf));
+  return unique_ptr<IOBuf>(new IOBuf(kExtUserSupplied, kFlagUserOwned,
+                                     bufPtr, capacity,
+                                     bufPtr, capacity,
+                                     NULL));
+}
+
+IOBuf::IOBuf(uint8_t* end)
+  : next_(this),
+    prev_(this),
+    data_(int_.buf),
+    length_(0),
+    flags_(0) {
+  int_.capacity = end - int_.buf;
+  assert(int_.capacity <= kMaxInternalDataSize);
+}
+
+IOBuf::IOBuf(ExtBufTypeEnum type,
+             uint32_t flags,
+             uint8_t* buf,
+             uint32_t capacity,
+             uint8_t* data,
+             uint32_t length,
+             SharedInfo* sharedInfo)
+  : next_(this),
+    prev_(this),
+    data_(data),
+    length_(length),
+    flags_(kFlagExt | flags) {
+  ext_.capacity = capacity;
+  ext_.type = type;
+  ext_.buf = buf;
+  ext_.sharedInfo = sharedInfo;
+
+  assert(data >= buf);
+  assert(data + length <= buf + capacity);
+  assert(static_cast<bool>(flags & kFlagUserOwned) ==
+         (sharedInfo == NULL));
+}
+
+IOBuf::~IOBuf() {
+  // Destroying an IOBuf destroys the entire chain.
+  // Users of IOBuf should only explicitly delete the head of any chain.
+  // The other elements in the chain will be automatically destroyed.
+  while (next_ != this) {
+    // Since unlink() returns unique_ptr() and we don't store it,
+    // it will automatically delete the unlinked element.
+    (void)next_->unlink();
+  }
+
+  if (flags_ & kFlagExt) {
+    decrementRefcount();
+  }
+}
+
+bool IOBuf::empty() const {
+  const IOBuf* current = this;
+  do {
+    if (current->length() != 0) {
+      return false;
+    }
+    current = current->next_;
+  } while (current != this);
+  return true;
+}
+
+uint32_t IOBuf::countChainElements() const {
+  uint32_t numElements = 1;
+  for (IOBuf* current = next_; current != this; current = current->next_) {
+    ++numElements;
+  }
+  return numElements;
+}
+
+uint64_t IOBuf::computeChainDataLength() const {
+  uint64_t fullLength = length_;
+  for (IOBuf* current = next_; current != this; current = current->next_) {
+    fullLength += current->length_;
+  }
+  return fullLength;
+}
+
+void IOBuf::prependChain(unique_ptr<IOBuf>&& iobuf) {
+  // Take ownership of the specified IOBuf
+  IOBuf* other = iobuf.release();
+
+  // Remember the pointer to the tail of the other chain
+  IOBuf* otherTail = other->prev_;
+
+  // Hook up prev_->next_ to point at the start of the other chain,
+  // and other->prev_ to point at prev_
+  prev_->next_ = other;
+  other->prev_ = prev_;
+
+  // Hook up otherTail->next_ to point at us,
+  // and prev_ to point back at otherTail,
+  otherTail->next_ = this;
+  prev_ = otherTail;
+}
+
+unique_ptr<IOBuf> IOBuf::clone() const {
+  unique_ptr<IOBuf> newHead(cloneOne());
+
+  for (IOBuf* current = next_; current != this; current = current->next_) {
+    newHead->prependChain(current->cloneOne());
+  }
+
+  return newHead;
+}
+
+unique_ptr<IOBuf> IOBuf::cloneOne() const {
+  if (flags_ & kFlagExt) {
+    unique_ptr<IOBuf> iobuf(new IOBuf(static_cast<ExtBufTypeEnum>(ext_.type),
+                                      flags_, ext_.buf, ext_.capacity,
+                                      data_, length_,
+                                      ext_.sharedInfo));
+    if (ext_.sharedInfo) {
+      ext_.sharedInfo->refcount.fetch_add(1, std::memory_order_acq_rel);
+    }
+    return iobuf;
+  } else {
+    // We have an internal data buffer that cannot be shared
+    // Allocate a new IOBuf and copy the data into it.
+    unique_ptr<IOBuf> iobuf(IOBuf::create(int_.capacity));
+    assert((iobuf->flags_ & kFlagExt) == 0);
+    iobuf->data_ += headroom();
+    memcpy(iobuf->data_, data_, length_);
+    iobuf->length_ = length_;
+    return iobuf;
+  }
+}
+
+void IOBuf::unshareOneSlow() {
+  // Internal buffers are always unshared, so unshareOneSlow() can only be
+  // called for external buffers
+  assert(flags_ & kFlagExt);
+
+  // Allocate a new buffer for the data
+  uint8_t* buf;
+  SharedInfo* sharedInfo;
+  uint32_t actualCapacity;
+  allocExtBuffer(ext_.capacity, &buf, &sharedInfo, &actualCapacity);
+
+  // Copy the data
+  // Maintain the same amount of headroom.  Since we maintained the same
+  // minimum capacity we also maintain at least the same amount of tailroom.
+  uint32_t headlen = headroom();
+  memcpy(buf + headlen, data_, length_);
+
+  // Release our reference on the old buffer
+  decrementRefcount();
+  // Make sure kFlagExt is set, and kFlagUserOwned and kFlagFreeSharedInfo
+  // are not set.
+  flags_ = kFlagExt;
+
+  // Update the buffer pointers to point to the new buffer
+  data_ = buf + headlen;
+  ext_.buf = buf;
+  ext_.sharedInfo = sharedInfo;
+}
+
+void IOBuf::unshareChained() {
+  // unshareChained() should only be called if we are part of a chain of
+  // multiple IOBufs.  The caller should have already verified this.
+  assert(isChained());
+
+  IOBuf* current = this;
+  while (true) {
+    if (current->isSharedOne()) {
+      // we have to unshare
+      break;
+    }
+
+    current = current->next_;
+    if (current == this) {
+      // None of the IOBufs in the chain are shared,
+      // so return without doing anything
+      return;
+    }
+  }
+
+  // We have to unshare.  Let coalesceSlow() do the work.
+  coalesceSlow();
+}
+
+void IOBuf::coalesceSlow(size_t maxLength) {
+  // coalesceSlow() should only be called if we are part of a chain of multiple
+  // IOBufs.  The caller should have already verified this.
+  assert(isChained());
+  assert(length_ < maxLength);
+
+  // Compute the length of the entire chain
+  uint64_t newLength = 0;
+  IOBuf* end = this;
+  do {
+    newLength += end->length_;
+    end = end->next_;
+  } while (newLength < maxLength && end != this);
+
+  uint64_t newHeadroom = headroom();
+  uint64_t newTailroom = end->prev_->tailroom();
+  uint64_t newCapacity = newLength + newHeadroom + newTailroom;
+  if (newCapacity > UINT32_MAX) {
+    throw std::overflow_error("IOBuf chain too large to coalesce");
+  }
+
+  // Allocate space for the coalesced buffer.
+  // We always convert to an external buffer, even if we happened to be an
+  // internal buffer before.
+  uint8_t* newBuf;
+  SharedInfo* newInfo;
+  uint32_t actualCapacity;
+  allocExtBuffer(newCapacity, &newBuf, &newInfo, &actualCapacity);
+
+  // Copy the data into the new buffer
+  uint8_t* newData = newBuf + newHeadroom;
+  uint8_t* p = newData;
+  IOBuf* current = this;
+  do {
+    memcpy(p, current->data_, current->length_);
+    p += current->length_;
+    current = current->next_;
+  } while (current != end);
+
+  // Point at the new buffer
+  if (flags_ & kFlagExt) {
+    decrementRefcount();
+  }
+
+  // Make sure kFlagExt is set, and kFlagUserOwned and kFlagFreeSharedInfo
+  // are not set.
+  flags_ = kFlagExt;
+
+  ext_.capacity = actualCapacity;
+  ext_.type = kExtAllocated;
+  ext_.buf = newBuf;
+  ext_.sharedInfo = newInfo;
+  data_ = newData;
+  length_ = newLength;
+
+  // Separate from the rest of our chain.
+  // Since we don't store the unique_ptr returned by separateChain(),
+  // this will immediately delete the returned subchain.
+  (void)separateChain(next_, end->prev_);
+
+  // We should be only element left in the chain now
+  assert(length_ >= maxLength || !isChained());
+}
+
+void IOBuf::decrementRefcount() {
+  assert(flags_ & kFlagExt);
+
+  // Externally owned buffers don't have a SharedInfo object and aren't managed
+  // by the reference count
+  if (flags_ & kFlagUserOwned) {
+    assert(ext_.sharedInfo == NULL);
+    return;
+  }
+
+  // Decrement the refcount
+  uint32_t newcnt = ext_.sharedInfo->refcount.fetch_sub(
+      1, std::memory_order_acq_rel);
+  // Note that fetch_sub() returns the value before we decremented.
+  // If it is 1, we were the only remaining user; if it is greater there are
+  // still other users.
+  if (newcnt > 1) {
+    return;
+  }
+
+  // We were the last user.  Free the buffer
+  if (ext_.sharedInfo->freeFn != NULL) {
+    try {
+      ext_.sharedInfo->freeFn(ext_.buf, ext_.sharedInfo->userData);
+    } catch (...) {
+      // The user's free function should never throw.  Otherwise we might
+      // throw from the IOBuf destructor.  Other code paths like coalesce()
+      // also assume that decrementRefcount() cannot throw.
+      abort();
+    }
+  } else {
+    free(ext_.buf);
+  }
+
+  // Free the SharedInfo if it was allocated separately.
+  //
+  // This is only used by takeOwnership().
+  //
+  // To avoid this special case handling in decrementRefcount(), we could have
+  // takeOwnership() set a custom freeFn() that calls the user's free function
+  // then frees the SharedInfo object.  (This would require that
+  // takeOwnership() store the user's free function with its allocated
+  // SharedInfo object.)  However, handling this specially with a flag seems
+  // like it shouldn't be problematic.
+  if (flags_ & kFlagFreeSharedInfo) {
+    delete ext_.sharedInfo;
+  }
+}
+
+void IOBuf::reserveSlow(uint32_t minHeadroom, uint32_t minTailroom) {
+  size_t newCapacity = (size_t)length_ + minHeadroom + minTailroom;
+  CHECK_LT(newCapacity, UINT32_MAX);
+
+  // We'll need to reallocate the buffer.
+  // There are a few options.
+  // - If we have enough total room, move the data around in the buffer
+  //   and adjust the data_ pointer.
+  // - If we're using an internal buffer, we'll switch to an external
+  //   buffer with enough headroom and tailroom.
+  // - If we have enough headroom (headroom() >= minHeadroom) but not too much
+  //   (so we don't waste memory), we can try one of two things, depending on
+  //   whether we use jemalloc or not:
+  //   - If using jemalloc, we can try to expand in place, avoiding a memcpy()
+  //   - If not using jemalloc and we don't have too much to copy,
+  //     we'll use realloc() (note that realloc might have to copy
+  //     headroom + data + tailroom, see smartRealloc in folly/Malloc.h)
+  // - Otherwise, bite the bullet and reallocate.
+  if (headroom() + tailroom() >= minHeadroom + minTailroom) {
+    uint8_t* newData = writableBuffer() + minHeadroom;
+    memmove(newData, data_, length_);
+    data_ = newData;
+    return;
+  }
+
+  size_t newAllocatedCapacity = goodExtBufferSize(newCapacity);
+  uint8_t* newBuffer = nullptr;
+  uint32_t newHeadroom = 0;
+  uint32_t oldHeadroom = headroom();
+
+  if ((flags_ & kFlagExt) && length_ != 0 && oldHeadroom >= minHeadroom) {
+    if (usingJEMalloc()) {
+      size_t headSlack = oldHeadroom - minHeadroom;
+      // We assume that tailroom is more useful and more important than
+      // tailroom (not least because realloc / rallocm allow us to grow the
+      // buffer at the tail, but not at the head)  So, if we have more headroom
+      // than we need, we consider that "wasted".  We arbitrarily define "too
+      // much" headroom to be 25% of the capacity.
+      if (headSlack * 4 <= newCapacity) {
+        size_t allocatedCapacity = capacity() + sizeof(SharedInfo);
+        void* p = ext_.buf;
+        if (allocatedCapacity >= jemallocMinInPlaceExpandable) {
+          int r = rallocm(&p, &newAllocatedCapacity, newAllocatedCapacity,
+                          0, ALLOCM_NO_MOVE);
+          if (r == ALLOCM_SUCCESS) {
+            newBuffer = static_cast<uint8_t*>(p);
+            newHeadroom = oldHeadroom;
+          } else if (r == ALLOCM_ERR_OOM) {
+            // shouldn't happen as we don't actually allocate new memory
+            // (due to ALLOCM_NO_MOVE)
+            throw std::bad_alloc();
+          }
+          // if ALLOCM_ERR_NOT_MOVED, do nothing, fall back to
+          // malloc/memcpy/free
+        }
+      }
+    } else {  // Not using jemalloc
+      size_t copySlack = capacity() - length_;
+      if (copySlack * 2 <= length_) {
+        void* p = realloc(ext_.buf, newAllocatedCapacity);
+        if (UNLIKELY(p == nullptr)) {
+          throw std::bad_alloc();
+        }
+        newBuffer = static_cast<uint8_t*>(p);
+        newHeadroom = oldHeadroom;
+      }
+    }
+  }
+
+  // None of the previous reallocation strategies worked (or we're using
+  // an internal buffer).  malloc/copy/free.
+  if (newBuffer == nullptr) {
+    void* p = malloc(newAllocatedCapacity);
+    if (UNLIKELY(p == nullptr)) {
+      throw std::bad_alloc();
+    }
+    newBuffer = static_cast<uint8_t*>(p);
+    memcpy(newBuffer + minHeadroom, data_, length_);
+    if (flags_ & kFlagExt) {
+      free(ext_.buf);
+    }
+    newHeadroom = minHeadroom;
+  }
+
+  SharedInfo* info;
+  uint32_t cap;
+  initExtBuffer(newBuffer, newAllocatedCapacity, &info, &cap);
+
+  flags_ = kFlagExt;
+
+  ext_.capacity = cap;
+  ext_.type = kExtAllocated;
+  ext_.buf = newBuffer;
+  ext_.sharedInfo = info;
+  data_ = newBuffer + newHeadroom;
+  // length_ is unchanged
+}
+
+void IOBuf::allocExtBuffer(uint32_t minCapacity,
+                           uint8_t** bufReturn,
+                           SharedInfo** infoReturn,
+                           uint32_t* capacityReturn) {
+  size_t mallocSize = goodExtBufferSize(minCapacity);
+  uint8_t* buf = static_cast<uint8_t*>(malloc(mallocSize));
+  if (UNLIKELY(buf == NULL)) {
+    throw std::bad_alloc();
+  }
+  initExtBuffer(buf, mallocSize, infoReturn, capacityReturn);
+  *bufReturn = buf;
+}
+
+size_t IOBuf::goodExtBufferSize(uint32_t minCapacity) {
+  // Determine how much space we should allocate.  We'll store the SharedInfo
+  // for the external buffer just after the buffer itself.  (We store it just
+  // after the buffer rather than just before so that the code can still just
+  // use free(ext_.buf) to free the buffer.)
+  size_t minSize = static_cast<size_t>(minCapacity) + sizeof(SharedInfo);
+  // Add room for padding so that the SharedInfo will be aligned on an 8-byte
+  // boundary.
+  minSize = (minSize + 7) & ~7;
+
+  // Use goodMallocSize() to bump up the capacity to a decent size to request
+  // from malloc, so we can use all of the space that malloc will probably give
+  // us anyway.
+  return goodMallocSize(minSize);
+}
+
+void IOBuf::initExtBuffer(uint8_t* buf, size_t mallocSize,
+                          SharedInfo** infoReturn,
+                          uint32_t* capacityReturn) {
+  // Find the SharedInfo storage at the end of the buffer
+  // and construct the SharedInfo.
+  uint8_t* infoStart = (buf + mallocSize) - sizeof(SharedInfo);
+  SharedInfo* sharedInfo = new(infoStart) SharedInfo;
+
+  size_t actualCapacity = infoStart - buf;
+  // On the unlikely possibility that the actual capacity is larger than can
+  // fit in a uint32_t after adding room for the refcount and calling
+  // goodMallocSize(), truncate downwards if necessary.
+  if (actualCapacity >= UINT32_MAX) {
+    *capacityReturn = UINT32_MAX;
+  } else {
+    *capacityReturn = actualCapacity;
+  }
+
+  *infoReturn = sharedInfo;
+}
+
+} // folly
diff --git a/folly/experimental/io/IOBuf.h b/folly/experimental/io/IOBuf.h
new file mode 100644
index 00000000..9b439de3
--- /dev/null
+++ b/folly/experimental/io/IOBuf.h
@@ -0,0 +1,972 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_IO_IOBUF_H_
+#define FOLLY_IO_IOBUF_H_
+
+#include <glog/logging.h>
+#include <atomic>
+#include <cassert>
+#include <cinttypes>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <limits>
+
+namespace folly {
+
+/**
+ * An IOBuf is a pointer to a buffer of data.
+ *
+ * IOBuf objects are intended to be used primarily for networking code, and are
+ * modelled somewhat after FreeBSD's mbuf data structure, and Linux's sk_buff
+ * structure.
+ *
+ * IOBuf objects facilitate zero-copy network programming, by allowing multiple
+ * IOBuf objects to point to the same underlying buffer of data, using a
+ * reference count to track when the buffer is no longer needed and can be
+ * freed.
+ *
+ *
+ * Data Layout
+ * -----------
+ *
+ * The IOBuf itself is a small object containing a pointer to the buffer and
+ * information about which segment of the buffer contains valid data.
+ *
+ * The data layout looks like this:
+ *
+ *  +-------+
+ *  | IOBuf |
+ *  +-------+
+ *   /
+ *  |
+ *  v
+ *  +------------+--------------------+-----------+
+ *  | headroom   |        data        |  tailroom |
+ *  +------------+--------------------+-----------+
+ *  ^            ^                    ^           ^
+ *  buffer()   data()               tail()      bufferEnd()
+ *
+ *  The length() method returns the length of the valid data; capacity()
+ *  returns the entire capacity of the buffer (from buffer() to bufferEnd()).
+ *  The headroom() and tailroom() methods return the amount of unused capacity
+ *  available before and after the data.
+ *
+ *
+ * Buffer Sharing
+ * --------------
+ *
+ * The buffer itself is reference counted, and multiple IOBuf objects may point
+ * to the same buffer.  Each IOBuf may point to a different section of valid
+ * data within the underlying buffer.  For example, if multiple protocol
+ * requests are read from the network into a single buffer, a separate IOBuf
+ * may be created for each request, all sharing the same underlying buffer.
+ *
+ * In other words, when multiple IOBufs share the same underlying buffer, the
+ * data() and tail() methods on each IOBuf may point to a different segment of
+ * the data.  However, the buffer() and bufferEnd() methods will point to the
+ * same location for all IOBufs sharing the same underlying buffer.
+ *
+ *       +-----------+     +---------+
+ *       |  IOBuf 1  |     | IOBuf 2 |
+ *       +-----------+     +---------+
+ *        |         | _____/        |
+ *   data |    tail |/    data      | tail
+ *        v         v               v
+ *  +-------------------------------------+
+ *  |     |         |               |     |
+ *  +-------------------------------------+
+ *
+ * If you only read data from an IOBuf, you don't need to worry about other
+ * IOBuf objects possibly sharing the same underlying buffer.  However, if you
+ * ever write to the buffer you need to first ensure that no other IOBufs point
+ * to the same buffer.  The unshare() method may be used to ensure that you
+ * have an unshared buffer.
+ *
+ *
+ * IOBuf Chains
+ * ------------
+ *
+ * IOBuf objects also contain pointers to next and previous IOBuf objects.
+ * This can be used to represent a single logical piece of data that its stored
+ * in non-contiguous chunks in separate buffers.
+ *
+ * A single IOBuf object can only belong to one chain at a time.
+ *
+ * IOBuf chains are always circular.  The "prev" pointer in the head of the
+ * chain points to the tail of the chain.  However, it is up to the user to
+ * decide which IOBuf is the head.  Internally the IOBuf code does not care
+ * which element is the head.
+ *
+ * The lifetime of all IOBufs in the chain are linked: when one element in the
+ * chain is deleted, all other chained elements are also deleted.  Conceptually
+ * it is simplest to treat this as if the head of the chain owns all other
+ * IOBufs in the chain.  When you delete the head of the chain, it will delete
+ * the other elements as well.  For this reason, prependChain() and
+ * appendChain() take ownership of of the new elements being added to this
+ * chain.
+ *
+ * When the coalesce() method is used to coalesce an entire IOBuf chain into a
+ * single IOBuf, all other IOBufs in the chain are eliminated and automatically
+ * deleted.  The unshare() method may coalesce the chain; if it does it will
+ * similarly delete all IOBufs eliminated from the chain.
+ *
+ * As discussed in the following section, it is up to the user to maintain a
+ * lock around the entire IOBuf chain if multiple threads need to access the
+ * chain.  IOBuf does not provide any internal locking.
+ *
+ *
+ * Synchronization
+ * ---------------
+ *
+ * When used in multithread programs, a single IOBuf object should only be used
+ * in a single thread at a time.  If a caller uses a single IOBuf across
+ * multiple threads the caller is responsible for using an external lock to
+ * synchronize access to the IOBuf.
+ *
+ * Two separate IOBuf objects may be accessed concurrently in separate threads
+ * without locking, even if they point to the same underlying buffer.  The
+ * buffer reference count is always accessed atomically, and no other
+ * operations should affect other IOBufs that point to the same data segment.
+ * The caller is responsible for using unshare() to ensure that the data buffer
+ * is not shared by other IOBufs before writing to it, and this ensures that
+ * the data itself is not modified in one thread while also being accessed from
+ * another thread.
+ *
+ * For IOBuf chains, no two IOBufs in the same chain should be accessed
+ * simultaneously in separate threads.  The caller must maintain a lock around
+ * the entire chain if the chain, or individual IOBufs in the chain, may be
+ * accessed by multiple threads.
+ *
+ *
+ * IOBuf Object Allocation/Sharing
+ * -------------------------------
+ *
+ * IOBuf objects themselves are always allocated on the heap.  The IOBuf
+ * constructors are private, so IOBuf objects may not be created on the stack.
+ * In part this is done since some IOBuf objects use small-buffer optimization
+ * and contain the buffer data immediately after the IOBuf object itself.  The
+ * coalesce() and unshare() methods also expect to be able to delete subsequent
+ * IOBuf objects in the chain if they are no longer needed due to coalescing.
+ *
+ * The IOBuf structure also does not provide room for an intrusive refcount on
+ * the IOBuf object itself, only the underlying data buffer is reference
+ * counted.  If users want to share the same IOBuf object between multiple
+ * parts of the code, they are responsible for managing this sharing on their
+ * own.  (For example, by using a shared_ptr.  Alternatively, users always have
+ * the option of using clone() to create a second IOBuf that points to the same
+ * underlying buffer.)
+ *
+ * With jemalloc, allocating small objects like IOBuf objects should be
+ * relatively fast, and the cost of allocating IOBuf objects on the heap and
+ * cloning new IOBufs should be relatively cheap.
+ */
+class IOBuf {
+ public:
+  typedef void (*FreeFunction)(void* buf, void* userData);
+
+  /**
+   * Allocate a new IOBuf object with the requested capacity.
+   *
+   * Returns a new IOBuf object that must be (eventually) deleted by the
+   * caller.  The returned IOBuf may actually have slightly more capacity than
+   * requested.
+   *
+   * The data pointer will initially point to the start of the newly allocated
+   * buffer, and will have a data length of 0.
+   *
+   * Throws std::bad_alloc on error.
+   */
+  static std::unique_ptr<IOBuf> create(uint32_t capacity);
+
+  /**
+   * Create a new IOBuf pointing to an existing data buffer.
+   *
+   * The new IOBuffer will assume ownership of the buffer, and free it by
+   * calling the specified FreeFunction when the last IOBuf pointing to this
+   * buffer is destroyed.  The function will be called with a pointer to the
+   * buffer as the first argument, and the supplied userData value as the
+   * second argument.  The free function must never throw exceptions.
+   *
+   * If no FreeFunction is specified, the buffer will be freed using free().
+   *
+   * The IOBuf data pointer will initially point to the start of the buffer,
+   * and the length will be the full capacity of the buffer.
+   *
+   * On error, std::bad_alloc will be thrown.  If freeOnError is true (the
+   * default) the buffer will be freed before throwing the error.
+   */
+  static std::unique_ptr<IOBuf> takeOwnership(void* buf, uint32_t capacity,
+                                              FreeFunction freeFn = NULL,
+                                              void* userData = NULL,
+                                              bool freeOnError = true);
+
+  /**
+   * Create a new IOBuf object that points to an existing user-owned buffer.
+   *
+   * This should only be used when the caller knows the lifetime of the IOBuf
+   * object ahead of time and can ensure that all IOBuf objects that will point
+   * to this buffer will be destroyed before the buffer itself is destroyed.
+   *
+   * This buffer will not be freed automatically when the last IOBuf
+   * referencing it is destroyed.  It is the caller's responsibility to free
+   * the buffer after the last IOBuf has been destroyed.
+   *
+   * The IOBuf data pointer will initially point to the start of the buffer,
+   * and the length will be the full capacity of the buffer.
+   *
+   * An IOBuf created using wrapBuffer() will always be reported as shared.
+   * unshare() may be used to create a writable copy of the buffer.
+   *
+   * On error, std::bad_alloc will be thrown.
+   */
+  static std::unique_ptr<IOBuf> wrapBuffer(const void* buf, uint32_t capacity);
+
+  /**
+   * Convenience function to create a new IOBuf object that copies data from a
+   * user-supplied buffer, optionally allocating a given amount of
+   * headroom and tailroom.
+   */
+  static std::unique_ptr<IOBuf> copyBuffer(const void* buf, uint32_t size,
+                                           uint32_t headroom=0,
+                                           uint32_t minTailroom=0);
+
+  /**
+   * Convenience function to free a chain of IOBufs held by a unique_ptr.
+   */
+  static void destroy(std::unique_ptr<IOBuf>&& data) {
+    auto destroyer = std::move(data);
+  }
+
+  /**
+   * Destroy this IOBuf.
+   *
+   * Deleting an IOBuf will automatically destroy all IOBufs in the chain.
+   * (See the comments above regarding the ownership model of IOBuf chains.
+   * All subsequent IOBufs in the chain are considered to be owned by the head
+   * of the chain.  Users should only explicitly delete the head of a chain.)
+   *
+   * When each individual IOBuf is destroyed, it will release its reference
+   * count on the underlying buffer.  If it was the last user of the buffer,
+   * the buffer will be freed.
+   */
+  ~IOBuf();
+
+  /**
+   * Check whether the chain is empty (i.e., whether the IOBufs in the
+   * chain have a total data length of zero).
+   *
+   * This method is semantically equivalent to
+   *   i->computeChainDataLength()==0
+   * but may run faster because it can short-circuit as soon as it
+   * encounters a buffer with length()!=0
+   */
+  bool empty() const;
+
+  /**
+   * Get the pointer to the start of the data.
+   */
+  const uint8_t* data() const {
+    return data_;
+  }
+
+  /**
+   * Get a writable pointer to the start of the data.
+   *
+   * The caller is responsible for calling unshare() first to ensure that it is
+   * actually safe to write to the buffer.
+   */
+  uint8_t* writableData() {
+    return data_;
+  }
+
+  /**
+   * Get the pointer to the end of the data.
+   */
+  const uint8_t* tail() const {
+    return data_ + length_;
+  }
+
+  /**
+   * Get a writable pointer to the end of the data.
+   *
+   * The caller is responsible for calling unshare() first to ensure that it is
+   * actually safe to write to the buffer.
+   */
+  uint8_t* writableTail() {
+    return data_ + length_;
+  }
+
+  /**
+   * Get the data length.
+   */
+  uint32_t length() const {
+    return length_;
+  }
+
+  /**
+   * Get the amount of head room.
+   *
+   * Returns the number of bytes in the buffer before the start of the data.
+   */
+  uint32_t headroom() const {
+    return data_ - buffer();
+  }
+
+  /**
+   * Get the amount of tail room.
+   *
+   * Returns the number of bytes in the buffer after the end of the data.
+   */
+  uint32_t tailroom() const {
+    return bufferEnd() - tail();
+  }
+
+  /**
+   * Get the pointer to the start of the buffer.
+   *
+   * Note that this is the pointer to the very beginning of the usable buffer,
+   * not the start of valid data within the buffer.  Use the data() method to
+   * get a pointer to the start of the data within the buffer.
+   */
+  const uint8_t* buffer() const {
+    return (flags_ & kFlagExt) ? ext_.buf : int_.buf;
+  }
+
+  /**
+   * Get a writable pointer to the start of the buffer.
+   *
+   * The caller is responsible for calling unshare() first to ensure that it is
+   * actually safe to write to the buffer.
+   */
+  uint8_t* writableBuffer() {
+    return (flags_ & kFlagExt) ? ext_.buf : int_.buf;
+  }
+
+  /**
+   * Get the pointer to the end of the buffer.
+   *
+   * Note that this is the pointer to the very end of the usable buffer,
+   * not the end of valid data within the buffer.  Use the tail() method to
+   * get a pointer to the end of the data within the buffer.
+   */
+  const uint8_t* bufferEnd() const {
+    return (flags_ & kFlagExt) ?
+      ext_.buf + ext_.capacity :
+      int_.buf + int_.capacity;
+  }
+
+  /**
+   * Get the total size of the buffer.
+   *
+   * This returns the total usable length of the buffer.  Use the length()
+   * method to get the length of the actual valid data in this IOBuf.
+   */
+  uint32_t capacity() const {
+    return (flags_ & kFlagExt) ?  ext_.capacity : int_.capacity;
+  }
+
+  /**
+   * Get a pointer to the next IOBuf in this chain.
+   */
+  IOBuf* next() {
+    return next_;
+  }
+  const IOBuf* next() const {
+    return next_;
+  }
+
+  /**
+   * Get a pointer to the previous IOBuf in this chain.
+   */
+  IOBuf* prev() {
+    return prev_;
+  }
+  const IOBuf* prev() const {
+    return prev_;
+  }
+
+  /**
+   * Shift the data forwards in the buffer.
+   *
+   * This shifts the data pointer forwards in the buffer to increase the
+   * headroom.  This is commonly used to increase the headroom in a newly
+   * allocated buffer.
+   *
+   * The caller is responsible for ensuring that there is sufficient
+   * tailroom in the buffer before calling advance().
+   *
+   * If there is a non-zero data length, advance() will use memmove() to shift
+   * the data forwards in the buffer.  In this case, the caller is responsible
+   * for making sure the buffer is unshared, so it will not affect other IOBufs
+   * that may be sharing the same underlying buffer.
+   */
+  void advance(uint32_t amount) {
+    // In debug builds, assert if there is a problem.
+    assert(amount <= tailroom());
+
+    if (length_ > 0) {
+      memmove(data_ + amount, data_, length_);
+    }
+    data_ += amount;
+  }
+
+  /**
+   * Shift the data backwards in the buffer.
+   *
+   * The caller is responsible for ensuring that there is sufficient headroom
+   * in the buffer before calling retreat().
+   *
+   * If there is a non-zero data length, retreat() will use memmove() to shift
+   * the data backwards in the buffer.  In this case, the caller is responsible
+   * for making sure the buffer is unshared, so it will not affect other IOBufs
+   * that may be sharing the same underlying buffer.
+   */
+  void retreat(uint32_t amount) {
+    // In debug builds, assert if there is a problem.
+    assert(amount <= headroom());
+
+    if (length_ > 0) {
+      memmove(data_ - amount, data_, length_);
+    }
+    data_ -= amount;
+  }
+
+  /**
+   * Adjust the data pointer to include more valid data at the beginning.
+   *
+   * This moves the data pointer backwards to include more of the available
+   * buffer.  The caller is responsible for ensuring that there is sufficient
+   * headroom for the new data.  The caller is also responsible for populating
+   * this section with valid data.
+   *
+   * This does not modify any actual data in the buffer.
+   */
+  void prepend(uint32_t amount) {
+    CHECK(amount <= headroom());
+    data_ -= amount;
+  }
+
+  /**
+   * Adjust the tail pointer to include more valid data at the end.
+   *
+   * This moves the tail pointer forwards to include more of the available
+   * buffer.  The caller is responsible for ensuring that there is sufficient
+   * tailroom for the new data.  The caller is also responsible for populating
+   * this section with valid data.
+   *
+   * This does not modify any actual data in the buffer.
+   */
+  void append(uint32_t amount) {
+    CHECK(amount <= tailroom());
+    length_ += amount;
+  }
+
+  /**
+   * Adjust the data pointer forwards to include less valid data.
+   *
+   * This moves the data pointer forwards so that the first amount bytes are no
+   * longer considered valid data.  The caller is responsible for ensuring that
+   * amount is less than or equal to the actual data length.
+   *
+   * This does not modify any actual data in the buffer.
+   */
+  void trimStart(uint32_t amount) {
+    CHECK(amount <= length_);
+    data_ += amount;
+    length_ -= amount;
+  }
+
+  /**
+   * Adjust the tail pointer backwards to include less valid data.
+   *
+   * This moves the tail pointer backwards so that the last amount bytes are no
+   * longer considered valid data.  The caller is responsible for ensuring that
+   * amount is less than or equal to the actual data length.
+   *
+   * This does not modify any actual data in the buffer.
+   */
+  void trimEnd(uint32_t amount) {
+    CHECK(amount <= length_);
+    length_ -= amount;
+  }
+
+  /**
+   * Clear the buffer.
+   *
+   * Postcondition: headroom() == 0, length() == 0, tailroom() == capacity()
+   */
+  void clear() {
+    data_ = writableBuffer();
+    length_ = 0;
+  }
+
+  /**
+   * Ensure that this buffer has at least minHeadroom headroom bytes and at
+   * least minTailroom tailroom bytes.  The buffer must be writable
+   * (you must call unshare() before this, if necessary).
+   *
+   * Postcondition: headroom() >= minHeadroom, tailroom() >= minTailroom,
+   * the data (between data() and data() + length()) is preserved.
+   */
+  void reserve(uint32_t minHeadroom, uint32_t minTailroom) {
+    // Maybe we don't need to do anything.
+    if (headroom() >= minHeadroom && tailroom() >= minTailroom) {
+      return;
+    }
+    // If the buffer is empty but we have enough total room (head + tail),
+    // move the data_ pointer around.
+    if (length() == 0 &&
+        headroom() + tailroom() >= minHeadroom + minTailroom) {
+      data_ = writableBuffer() + minHeadroom;
+      return;
+    }
+    // Bah, we have to do actual work.
+    reserveSlow(minHeadroom, minTailroom);
+  }
+
+  /**
+   * Return true if this IOBuf is part of a chain of multiple IOBufs, or false
+   * if this is the only IOBuf in its chain.
+   */
+  bool isChained() const {
+    assert((next_ == this) == (prev_ == this));
+    return next_ != this;
+  }
+
+  /**
+   * Get the number of IOBufs in this chain.
+   *
+   * Beware that this method has to walk the entire chain.
+   * Use isChained() if you just want to check if this IOBuf is part of a chain
+   * or not.
+   */
+  uint32_t countChainElements() const;
+
+  /**
+   * Get the length of all the data in this IOBuf chain.
+   *
+   * Beware that this method has to walk the entire chain.
+   */
+  uint64_t computeChainDataLength() const;
+
+  /**
+   * Insert another IOBuf chain immediately before this IOBuf.
+   *
+   * For example, if there are two IOBuf chains (A, B, C) and (D, E, F),
+   * and B->prependChain(D) is called, the (D, E, F) chain will be subsumed
+   * and become part of the chain starting at A, which will now look like
+   * (A, D, E, F, B, C)
+   *
+   * Note that since IOBuf chains are circular, head->prependChain(other) can
+   * be used to append the other chain at the very end of the chain pointed to
+   * by head.  For example, if there are two IOBuf chains (A, B, C) and
+   * (D, E, F), and A->prependChain(D) is called, the chain starting at A will
+   * now consist of (A, B, C, D, E, F)
+   *
+   * The elements in the specified IOBuf chain will become part of this chain,
+   * and will be owned by the head of this chain.  When this chain is
+   * destroyed, all elements in the supplied chain will also be destroyed.
+   *
+   * For this reason, appendChain() only accepts an rvalue-reference to a
+   * unique_ptr(), to make it clear that it is taking ownership of the supplied
+   * chain.  If you have a raw pointer, you can pass in a new temporary
+   * unique_ptr around the raw pointer.  If you have an existing,
+   * non-temporary unique_ptr, you must call std::move(ptr) to make it clear
+   * that you are destroying the original pointer.
+   */
+  void prependChain(std::unique_ptr<IOBuf>&& iobuf);
+
+  /**
+   * Append another IOBuf chain immediately after this IOBuf.
+   *
+   * For example, if there are two IOBuf chains (A, B, C) and (D, E, F),
+   * and B->appendChain(D) is called, the (D, E, F) chain will be subsumed
+   * and become part of the chain starting at A, which will now look like
+   * (A, B, D, E, F, C)
+   *
+   * The elements in the specified IOBuf chain will become part of this chain,
+   * and will be owned by the head of this chain.  When this chain is
+   * destroyed, all elements in the supplied chain will also be destroyed.
+   *
+   * For this reason, appendChain() only accepts an rvalue-reference to a
+   * unique_ptr(), to make it clear that it is taking ownership of the supplied
+   * chain.  If you have a raw pointer, you can pass in a new temporary
+   * unique_ptr around the raw pointer.  If you have an existing,
+   * non-temporary unique_ptr, you must call std::move(ptr) to make it clear
+   * that you are destroying the original pointer.
+   */
+  void appendChain(std::unique_ptr<IOBuf>&& iobuf) {
+    // Just use prependChain() on the next element in our chain
+    next_->prependChain(std::move(iobuf));
+  }
+
+  /**
+   * Remove this IOBuf from its current chain.
+   *
+   * Since ownership of all elements an IOBuf chain is normally maintained by
+   * the head of the chain, unlink() transfers ownership of this IOBuf from the
+   * chain and gives it to the caller.  A new unique_ptr to the IOBuf is
+   * returned to the caller.  The caller must store the returned unique_ptr (or
+   * call release() on it) to take ownership, otherwise the IOBuf will be
+   * immediately destroyed.
+   *
+   * Since unlink transfers ownership of the IOBuf to the caller, be careful
+   * not to call unlink() on the head of a chain if you already maintain
+   * ownership on the head of the chain via other means.  The pop() method
+   * is a better choice for that situation.
+   */
+  std::unique_ptr<IOBuf> unlink() {
+    next_->prev_ = prev_;
+    prev_->next_ = next_;
+    prev_ = this;
+    next_ = this;
+    return std::unique_ptr<IOBuf>(this);
+  }
+
+  /**
+   * Remove this IOBuf from its current chain and return a unique_ptr to
+   * the IOBuf that formerly followed it in the chain.
+   */
+  std::unique_ptr<IOBuf> pop() {
+    IOBuf *next = next_;
+    next_->prev_ = prev_;
+    prev_->next_ = next_;
+    prev_ = this;
+    next_ = this;
+    return std::unique_ptr<IOBuf>((next == this) ? NULL : next);
+  }
+
+  /**
+   * Remove a subchain from this chain.
+   *
+   * Remove the subchain starting at head and ending at tail from this chain.
+   *
+   * Returns a unique_ptr pointing to head.  (In other words, ownership of the
+   * head of the subchain is transferred to the caller.)  If the caller ignores
+   * the return value and lets the unique_ptr be destroyed, the subchain will
+   * be immediately destroyed.
+   *
+   * The subchain referenced by the specified head and tail must be part of the
+   * same chain as the current IOBuf, but must not contain the current IOBuf.
+   * However, the specified head and tail may be equal to each other (i.e.,
+   * they may be a subchain of length 1).
+   */
+  std::unique_ptr<IOBuf> separateChain(IOBuf* head, IOBuf* tail) {
+    assert(head != this);
+    assert(tail != this);
+
+    head->prev_->next_ = tail->next_;
+    tail->next_->prev_ = head->prev_;
+
+    head->prev_ = tail;
+    tail->next_ = head;
+
+    return std::unique_ptr<IOBuf>(head);
+  }
+
+  /**
+   * Return true if at least one of the IOBufs in this chain are shared,
+   * or false if all of the IOBufs point to unique buffers.
+   *
+   * Use isSharedOne() to only check this IOBuf rather than the entire chain.
+   */
+  bool isShared() const {
+    const IOBuf* current = this;
+    while (true) {
+      if (current->isSharedOne()) {
+        return true;
+      }
+      current = current->next_;
+      if (current == this) {
+        return false;
+      }
+    }
+  }
+
+  /**
+   * Return true if other IOBufs are also pointing to the buffer used by this
+   * IOBuf, and false otherwise.
+   *
+   * If this IOBuf points at a buffer owned by another (non-IOBuf) part of the
+   * code (i.e., if the IOBuf was created using wrapBuffer(), or was cloned
+   * from such an IOBuf), it is always considered shared.
+   *
+   * This only checks the current IOBuf, and not other IOBufs in the chain.
+   */
+  bool isSharedOne() const {
+    // If this is a user-owned buffer, it is always considered shared
+    if (flags_ & kFlagUserOwned) {
+      return true;
+    }
+
+    if (flags_ & kFlagExt) {
+      return ext_.sharedInfo->refcount.load(std::memory_order_acquire) > 1;
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * Ensure that this IOBuf has a unique buffer that is not shared by other
+   * IOBufs.
+   *
+   * unshare() operates on an entire chain of IOBuf objects.  If the chain is
+   * shared, it may also coalesce the chain when making it unique.  If the
+   * chain is coalesced, subsequent IOBuf objects in the current chain will be
+   * automatically deleted.
+   *
+   * Note that buffers owned by other (non-IOBuf) users are automatically
+   * considered shared.
+   *
+   * Throws std::bad_alloc on error.  On error the IOBuf chain will be
+   * unmodified.
+   *
+   * Currently unshare may also throw std::overflow_error if it tries to
+   * coalesce.  (TODO: In the future it would be nice if unshare() were smart
+   * enough not to coalesce the entire buffer if the data is too large.
+   * However, in practice this seems unlikely to become an issue.)
+   */
+  void unshare() {
+    if (isChained()) {
+      unshareChained();
+    } else {
+      unshareOne();
+    }
+  }
+
+  /**
+   * Ensure that this IOBuf has a unique buffer that is not shared by other
+   * IOBufs.
+   *
+   * unshareOne() operates on a single IOBuf object.  This IOBuf will have a
+   * unique buffer after unshareOne() returns, but other IOBufs in the chain
+   * may still be shared after unshareOne() returns.
+   *
+   * Throws std::bad_alloc on error.  On error the IOBuf will be unmodified.
+   */
+  void unshareOne() {
+    if (isSharedOne()) {
+      unshareOneSlow();
+    }
+  }
+
+  /**
+   * Coalesce this IOBuf chain into a single buffer.
+   *
+   * This method moves all of the data in this IOBuf chain into a single
+   * contiguous buffer, if it is not already in one buffer.  After coalesce()
+   * returns, this IOBuf will be a chain of length one.  Other IOBufs in the
+   * chain will be automatically deleted.
+   *
+   * After coalescing, the IOBuf will have at least as much headroom as the
+   * first IOBuf in the chain, and at least as much tailroom as the last IOBuf
+   * in the chain.
+   *
+   * Throws std::bad_alloc on error.  On error the IOBuf chain will be
+   * unmodified.  Throws std::overflow_error if the length of the entire chain
+   * larger than can be described by a uint32_t capacity.
+   */
+  void coalesce() {
+    if (!isChained()) {
+      return;
+    }
+    coalesceSlow();
+  }
+
+  /**
+   * Ensure that this chain has at least maxLength bytes available as a
+   * contiguous memory range.
+   *
+   * This method coalesces whole buffers in the chain into this buffer as
+   * necessary until this buffer's length() is at least maxLength.
+   *
+   * After coalescing, the IOBuf will have at least as much headroom as the
+   * first IOBuf in the chain, and at least as much tailroom as the last IOBuf
+   * that was coalesced.
+   *
+   * Throws std::bad_alloc on error.  On error the IOBuf chain will be
+   * unmodified.  Throws std::overflow_error if the length of the coalesced
+   * portion of the chain is larger than can be described by a uint32_t
+   * capacity.  (Although maxLength is uint32_t, gather() doesn't split
+   * buffers, so coalescing whole buffers may result in a capacity that can't
+   * be described in uint32_t.
+   *
+   * Upon return, either enough of the chain was coalesced into a contiguous
+   * region, or the entire chain was coalesced.  That is,
+   * length() >= maxLength || !isChained() is true.
+   */
+  void gather(uint32_t maxLength) {
+    if (!isChained() || length_ >= maxLength) {
+      return;
+    }
+    coalesceSlow(maxLength);
+  }
+
+  /**
+   * Return a new IOBuf chain sharing the same data as this chain.
+   *
+   * The new IOBuf chain will normally point to the same underlying data
+   * buffers as the original chain.  (The one exception to this is if some of
+   * the IOBufs in this chain contain small internal data buffers which cannot
+   * be shared.)
+   */
+  std::unique_ptr<IOBuf> clone() const;
+
+  /**
+   * Return a new IOBuf with the same data as this IOBuf.
+   *
+   * The new IOBuf returned will not be part of a chain (even if this IOBuf is
+   * part of a larger chain).
+   */
+  std::unique_ptr<IOBuf> cloneOne() const;
+
+  // Overridden operator new and delete.
+  // These directly use malloc() and free() to allocate the space for IOBuf
+  // objects.  This is needed since IOBuf::create() manually uses malloc when
+  // allocating IOBuf objects with an internal buffer.
+  void* operator new(size_t size);
+  void* operator new(size_t size, void* ptr);
+  void operator delete(void* ptr);
+
+ private:
+  enum FlagsEnum {
+    kFlagExt = 0x1,
+    kFlagUserOwned = 0x2,
+    kFlagFreeSharedInfo = 0x4,
+  };
+
+  // Values for the ExternalBuf type field.
+  // We currently don't really use this for anything, other than to have it
+  // around for debugging purposes.  We store it at the moment just because we
+  // have the 4 extra bytes in the ExternalBuf struct that would just be
+  // padding otherwise.
+  enum ExtBufTypeEnum {
+    kExtAllocated = 0,
+    kExtUserSupplied = 1,
+    kExtUserOwned = 2,
+  };
+
+  struct SharedInfo {
+    SharedInfo();
+    SharedInfo(FreeFunction fn, void* arg);
+
+    // A pointer to a function to call to free the buffer when the refcount
+    // hits 0.  If this is NULL, free() will be used instead.
+    FreeFunction freeFn;
+    void* userData;
+    std::atomic<uint32_t> refcount;
+  };
+  struct ExternalBuf {
+    uint32_t capacity;
+    uint32_t type;
+    uint8_t* buf;
+    // SharedInfo may be NULL if kFlagUserOwned is set.  It is non-NULL
+    // in all other cases.
+    SharedInfo* sharedInfo;
+  };
+  struct InternalBuf {
+    uint32_t capacity;
+    uint8_t buf[];
+  };
+
+  // The maximum size for an IOBuf object, including any internal data buffer
+  static const uint32_t kMaxIOBufSize = 256;
+  static const uint32_t kMaxInternalDataSize;
+
+  // Forbidden copy constructor and assignment opererator
+  IOBuf(IOBuf const &);
+  IOBuf& operator=(IOBuf const &);
+
+  /**
+   * Create a new IOBuf with internal data.
+   *
+   * end is a pointer to the end of the IOBuf's internal data buffer.
+   */
+  explicit IOBuf(uint8_t* end);
+
+  /**
+   * Create a new IOBuf pointing to an external buffer.
+   *
+   * The caller is responsible for holding a reference count for this new
+   * IOBuf.  The IOBuf constructor does not automatically increment the
+   * reference count.
+   */
+  IOBuf(ExtBufTypeEnum type, uint32_t flags,
+        uint8_t* buf, uint32_t capacity,
+        uint8_t* data, uint32_t length,
+        SharedInfo* sharedInfo);
+
+  void unshareOneSlow();
+  void unshareChained();
+  void coalesceSlow(size_t maxLength=std::numeric_limits<size_t>::max());
+  void decrementRefcount();
+  void reserveSlow(uint32_t minHeadroom, uint32_t minTailroom);
+
+  static size_t goodExtBufferSize(uint32_t minCapacity);
+  static void initExtBuffer(uint8_t* buf, size_t mallocSize,
+                            SharedInfo** infoReturn,
+                            uint32_t* capacityReturn);
+  static void allocExtBuffer(uint32_t minCapacity,
+                             uint8_t** bufReturn,
+                             SharedInfo** infoReturn,
+                             uint32_t* capacityReturn);
+
+  /*
+   * Member variables
+   */
+
+  /*
+   * Links to the next and the previous IOBuf in this chain.
+   *
+   * The chain is circularly linked (the last element in the chain points back
+   * at the head), and next_ and prev_ can never be NULL.  If this IOBuf is the
+   * only element in the chain, next_ and prev_ will both point to this.
+   */
+  IOBuf* next_;
+  IOBuf* prev_;
+
+  /*
+   * A pointer to the start of the data referenced by this IOBuf, and the
+   * length of the data.
+   *
+   * This may refer to any subsection of the actual buffer capacity.
+   */
+  uint8_t* data_;
+  uint32_t length_;
+  uint32_t flags_;
+
+  union {
+    ExternalBuf ext_;
+    InternalBuf int_;
+  };
+};
+
+inline std::unique_ptr<IOBuf> IOBuf::copyBuffer(
+    const void* data, uint32_t size, uint32_t headroom,
+    uint32_t minTailroom) {
+  uint32_t capacity = headroom + size + minTailroom;
+  std::unique_ptr<IOBuf> buf = create(capacity);
+  buf->advance(headroom);
+  memcpy(buf->writableData(), data, size);
+  buf->append(size);
+  return buf;
+}
+
+} // folly
+
+#endif // FOLLY_IO_IOBUF_H_
diff --git a/folly/experimental/io/IOBufQueue.cpp b/folly/experimental/io/IOBufQueue.cpp
new file mode 100644
index 00000000..4b6e5eab
--- /dev/null
+++ b/folly/experimental/io/IOBufQueue.cpp
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/experimental/io/IOBufQueue.h"
+
+#include <string.h>
+
+#include <stdexcept>
+
+using std::make_pair;
+using std::pair;
+using std::unique_ptr;
+
+namespace {
+
+using folly::IOBuf;
+
+const size_t MIN_ALLOC_SIZE = 2000;
+const size_t MAX_ALLOC_SIZE = 8000; // Must fit within a uint32_t
+
+/**
+ * Convenience function to append chain src to chain dst.
+ */
+void
+appendToChain(unique_ptr<IOBuf>& dst, unique_ptr<IOBuf>&& src) {
+  if (dst == NULL) {
+    dst = std::move(src);
+  } else {
+    dst->prev()->appendChain(std::move(src));
+  }
+}
+
+} // anonymous namespace
+
+namespace folly {
+
+IOBufQueue::IOBufQueue(const Options& options)
+  : options_(options),
+    chainLength_(0) {
+}
+
+IOBufQueue::IOBufQueue(IOBufQueue&& other)
+  : options_(other.options_),
+    chainLength_(other.chainLength_),
+    head_(std::move(other.head_)) {
+  other.chainLength_ = 0;
+}
+
+IOBufQueue& IOBufQueue::operator=(IOBufQueue&& other) {
+  if (&other != this) {
+    options_ = other.options_;
+    chainLength_ = other.chainLength_;
+    head_ = std::move(other.head_);
+    other.chainLength_ = 0;
+  }
+  return *this;
+}
+
+void
+IOBufQueue::append(unique_ptr<IOBuf>&& buf) {
+  if (!buf) {
+    return;
+  }
+  if (options_.cacheChainLength) {
+    chainLength_ += buf->computeChainDataLength();
+  }
+  appendToChain(head_, std::move(buf));
+}
+
+void
+IOBufQueue::append(IOBufQueue& other) {
+  if (!other.head_) {
+    return;
+  }
+  if (options_.cacheChainLength) {
+    if (other.options_.cacheChainLength) {
+      chainLength_ += other.chainLength_;
+    } else {
+      chainLength_ += other.head_->computeChainDataLength();
+    }
+  }
+  appendToChain(head_, std::move(other.head_));
+  other.chainLength_ = 0;
+}
+
+void
+IOBufQueue::append(const void* buf, size_t len) {
+  auto src = static_cast<const uint8_t*>(buf);
+  while (len != 0) {
+    if ((head_ == NULL) || head_->prev()->isSharedOne() ||
+        (head_->prev()->tailroom() == 0)) {
+      appendToChain(head_, std::move(
+          IOBuf::create(std::max(MIN_ALLOC_SIZE,
+              std::min(len, MAX_ALLOC_SIZE)))));
+    }
+    IOBuf* last = head_->prev();
+    uint32_t copyLen = std::min(len, (size_t)last->tailroom());
+    memcpy(last->writableTail(), src, copyLen);
+    src += copyLen;
+    last->append(copyLen);
+    if (options_.cacheChainLength) {
+      chainLength_ += copyLen;
+    }
+    len -= copyLen;
+  }
+}
+
+pair<void*,uint32_t>
+IOBufQueue::preallocate(uint32_t min, uint32_t max) {
+  if (head_ != NULL) {
+    // If there's enough space left over at the end of the queue, use that.
+    IOBuf* last = head_->prev();
+    if (!last->isSharedOne()) {
+      uint32_t avail = last->tailroom();
+      if (avail >= min) {
+        return make_pair(
+            last->writableTail(), std::min(max, avail));
+      }
+    }
+  }
+  // Allocate a new buffer of the requested max size.
+  unique_ptr<IOBuf> newBuf(IOBuf::create(max));
+  appendToChain(head_, std::move(newBuf));
+  IOBuf* last = head_->prev();
+  return make_pair(last->writableTail(),
+      std::min(max, last->tailroom()));
+}
+
+void
+IOBufQueue::postallocate(uint32_t n) {
+  head_->prev()->append(n);
+  if (options_.cacheChainLength) {
+    chainLength_ += n;
+  }
+}
+
+unique_ptr<IOBuf>
+IOBufQueue::split(size_t n) {
+  unique_ptr<IOBuf> result;
+  while (n != 0) {
+    if (head_ == NULL) {
+      throw std::underflow_error(
+          "Attempt to remove more bytes than are present in IOBufQueue");
+    } else if (head_->length() <= n) {
+      n -= head_->length();
+      if (options_.cacheChainLength) {
+        chainLength_ -= head_->length();
+      }
+      unique_ptr<IOBuf> remainder = head_->pop();
+      appendToChain(result, std::move(head_));
+      head_ = std::move(remainder);
+    } else {
+      unique_ptr<IOBuf> clone = head_->cloneOne();
+      clone->trimEnd(clone->length() - n);
+      appendToChain(result, std::move(clone));
+      head_->trimStart(n);
+      if (options_.cacheChainLength) {
+        chainLength_ -= n;
+      }
+      break;
+    }
+  }
+  return std::move(result);
+}
+
+void IOBufQueue::trimStart(size_t amount) {
+  while (amount > 0) {
+    if (!head_) {
+      throw std::underflow_error(
+        "Attempt to trim more bytes than are present in IOBufQueue");
+    }
+    if (head_->length() > amount) {
+      head_->trimStart(amount);
+      if (options_.cacheChainLength) {
+        chainLength_ -= amount;
+      }
+      break;
+    }
+    amount -= head_->length();
+    if (options_.cacheChainLength) {
+      chainLength_ -= head_->length();
+    }
+    head_ = head_->pop();
+  }
+}
+
+void IOBufQueue::trimEnd(size_t amount) {
+  while (amount > 0) {
+    if (!head_) {
+      throw std::underflow_error(
+        "Attempt to trim more bytes than are present in IOBufQueue");
+    }
+    if (head_->prev()->length() > amount) {
+      head_->prev()->trimEnd(amount);
+      if (options_.cacheChainLength) {
+        chainLength_ -= amount;
+      }
+      break;
+    }
+    amount -= head_->prev()->length();
+    if (options_.cacheChainLength) {
+      chainLength_ -= head_->prev()->length();
+    }
+    unique_ptr<IOBuf> b = head_->prev()->unlink();
+
+    // Null queue if we unlinked the head.
+    if (b.get() == head_.get()) {
+      head_.reset();
+    }
+  }
+}
+
+} // folly
diff --git a/folly/experimental/io/IOBufQueue.h b/folly/experimental/io/IOBufQueue.h
new file mode 100644
index 00000000..7806e7d2
--- /dev/null
+++ b/folly/experimental/io/IOBufQueue.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_IO_IOBUF_QUEUE_H
+#define FOLLY_IO_IOBUF_QUEUE_H
+
+#include "folly/experimental/io/IOBuf.h"
+
+#include <stdexcept>
+#include <string>
+
+namespace folly {
+
+/**
+ * An IOBufQueue encapsulates a chain of IOBufs and provides
+ * convenience functions to append data to the back of the chain
+ * and remove data from the front.
+ */
+class IOBufQueue {
+ public:
+  struct Options {
+    Options() : cacheChainLength(false) { }
+    bool cacheChainLength;
+  };
+
+  explicit IOBufQueue(const Options& options = Options());
+
+  /**
+   * Add a buffer or buffer chain to the end of this queue. The
+   * queue takes ownership of buf.
+   */
+  void append(std::unique_ptr<folly::IOBuf>&& buf);
+
+  /**
+   * Add a queue to the end of this queue. The queue takes ownership of
+   * all buffers from the other queue.
+   */
+  void append(IOBufQueue& other);
+  void append(IOBufQueue&& other) {
+    append(other);  // call lvalue reference overload, above
+  }
+
+  /**
+   * Copy len bytes, starting at buf, to the end of this queue.
+   * The caller retains ownership of the source data.
+   */
+  void append(const void* buf, size_t len);
+
+  /**
+   * Copy a string to the end of this queue.
+   * The caller retains ownership of the source data.
+   */
+  void append(const std::string& buf) {
+    append(buf.data(), buf.length());
+  }
+
+  /**
+   * Obtain a writable block of contiguous bytes at the end of this
+   * queue, allocating more space if necessary.  The amount of space
+   * reserved will be between min and max, inclusive; the IOBufQueue
+   * implementation may pick a value in that range that makes efficient
+   * use of already-allocated internal space.
+   *
+   * If the caller subsequently writes anything into the returned space,
+   * it must call the postallocate() method.
+   *
+   * @return The starting address of the block and the length in bytes.
+   *
+   * @note The point of the preallocate()/postallocate() mechanism is
+   *       to support I/O APIs such as Thrift's TAsyncSocket::ReadCallback
+   *       that request a buffer from the application and then, in a later
+   *       callback, tell the application how much of the buffer they've
+   *       filled with data.
+   */
+  std::pair<void*,uint32_t> preallocate(uint32_t min, uint32_t max);
+
+  /**
+   * Tell the queue that the caller has written data into the first n
+   * bytes provided by the previous preallocate() call.
+   *
+   * @note n should be less than or equal to the size returned by
+   *       preallocate().  If n is zero, the caller may skip the call
+   *       to postallocate().  If n is nonzero, the caller must not
+   *       invoke any other non-const methods on this IOBufQueue between
+   *       the call to preallocate and the call to postallocate().
+   */
+  void postallocate(uint32_t n);
+
+  /**
+   * Split off the first n bytes of the queue into a separate IOBuf chain,
+   * and transfer ownership of the new chain to the caller.  The IOBufQueue
+   * retains ownership of everything after the split point.
+   *
+   * @warning If the split point lies in the middle of some IOBuf within
+   *          the chain, this function may, as an implementation detail,
+   *          clone that IOBuf.
+   *
+   * @throws std::underflow_error if n exceeds the number of bytes
+   *         in the queue.
+   */
+  std::unique_ptr<folly::IOBuf> split(size_t n);
+
+  /**
+   * Similar to IOBuf::trimStart, but works on the whole queue.  Will
+   * pop off buffers that have been completely trimmed.
+   */
+  void trimStart(size_t amount);
+
+  /**
+   * Similar to IOBuf::trimEnd, but works on the whole queue.  Will
+   * pop off buffers that have been completely trimmed.
+   */
+  void trimEnd(size_t amount);
+
+  /**
+   * Transfer ownership of the queue's entire IOBuf chain to the caller.
+   */
+  std::unique_ptr<folly::IOBuf>&& move() {
+    chainLength_ = 0;
+    return std::move(head_);
+  }
+
+  /**
+   * Access
+   */
+  const folly::IOBuf* front() const {
+    return head_.get();
+  }
+
+  /**
+   * Total chain length, only valid if cacheLength was specified in the
+   * constructor.
+   */
+  size_t chainLength() const {
+    if (!options_.cacheChainLength) {
+      throw std::invalid_argument("IOBufQueue: chain length not cached");
+    }
+    return chainLength_;
+  }
+
+  const Options& options() const {
+    return options_;
+  }
+
+  /** Movable */
+  IOBufQueue(IOBufQueue&&);
+  IOBufQueue& operator=(IOBufQueue&&);
+
+ private:
+  static const size_t kChainLengthNotCached = (size_t)-1;
+  /** Not copyable */
+  IOBufQueue(const IOBufQueue&) = delete;
+  IOBufQueue& operator=(const IOBufQueue&) = delete;
+
+  Options options_;
+  size_t chainLength_;
+  /** Everything that has been appended but not yet discarded or moved out */
+  std::unique_ptr<folly::IOBuf> head_;
+};
+
+} // folly
+
+#endif // FOLLY_IO_IOBUF_QUEUE_H
diff --git a/folly/experimental/io/test/IOBufCursorTest.cpp b/folly/experimental/io/test/IOBufCursorTest.cpp
new file mode 100644
index 00000000..3ed91f04
--- /dev/null
+++ b/folly/experimental/io/test/IOBufCursorTest.cpp
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/experimental/io/IOBuf.h"
+
+#include <gflags/gflags.h>
+#include <boost/random.hpp>
+#include <gtest/gtest.h>
+#include "folly/Benchmark.h"
+#include "folly/Range.h"
+#include "folly/experimental/io/Cursor.h"
+
+DECLARE_bool(benchmark);
+
+using folly::IOBuf;
+using std::unique_ptr;
+using namespace folly::io;
+
+TEST(IOBuf, RWCursor) {
+  unique_ptr<IOBuf> iobuf1(IOBuf::create(20));
+  iobuf1->append(20);
+  unique_ptr<IOBuf> iobuf2(IOBuf::create(20));
+  iobuf2->append(20);
+
+  IOBuf* iob2ptr = iobuf2.get();
+  iobuf1->prependChain(std::move(iobuf2));
+
+  EXPECT_TRUE(iobuf1->isChained());
+
+  RWPrivateCursor wcursor(iobuf1.get());
+  Cursor rcursor(iobuf1.get());
+  wcursor.writeLE((uint64_t)1);
+  wcursor.writeLE((uint64_t)1);
+  wcursor.writeLE((uint64_t)1);
+  wcursor.write((uint8_t)1);
+
+  EXPECT_EQ(1, rcursor.readLE<uint64_t>());
+  rcursor.skip(8);
+  EXPECT_EQ(1, rcursor.readLE<uint32_t>());
+  rcursor.skip(0);
+  EXPECT_EQ(0, rcursor.read<uint8_t>());
+  EXPECT_EQ(0, rcursor.read<uint8_t>());
+  EXPECT_EQ(0, rcursor.read<uint8_t>());
+  EXPECT_EQ(0, rcursor.read<uint8_t>());
+  EXPECT_EQ(1, rcursor.read<uint8_t>());
+}
+
+TEST(IOBuf, skip) {
+  unique_ptr<IOBuf> iobuf1(IOBuf::create(20));
+  iobuf1->append(20);
+  RWPrivateCursor wcursor(iobuf1.get());
+  wcursor.write((uint8_t)1);
+  wcursor.write((uint8_t)2);
+  Cursor cursor(iobuf1.get());
+  cursor.skip(1);
+  EXPECT_EQ(2, cursor.read<uint8_t>());
+}
+
+TEST(IOBuf, reset) {
+  unique_ptr<IOBuf> iobuf1(IOBuf::create(20));
+  iobuf1->append(20);
+  RWPrivateCursor wcursor(iobuf1.get());
+  wcursor.write((uint8_t)1);
+  wcursor.write((uint8_t)2);
+  wcursor.reset(iobuf1.get());
+  EXPECT_EQ(1, wcursor.read<uint8_t>());
+}
+
+TEST(IOBuf, copy_assign_convert) {
+  unique_ptr<IOBuf> iobuf1(IOBuf::create(20));
+  iobuf1->append(20);
+  RWPrivateCursor wcursor(iobuf1.get());
+  RWPrivateCursor cursor2(wcursor);
+  RWPrivateCursor cursor3(iobuf1.get());
+
+  wcursor.write((uint8_t)1);
+  cursor3 = wcursor;
+  wcursor.write((uint8_t)2);
+  Cursor cursor4(wcursor);
+  RWPrivateCursor cursor5(wcursor);
+  wcursor.write((uint8_t)3);
+
+  EXPECT_EQ(1, cursor2.read<uint8_t>());
+  EXPECT_EQ(2, cursor3.read<uint8_t>());
+  EXPECT_EQ(3, cursor4.read<uint8_t>());
+}
+
+TEST(IOBuf, overloading) {
+  unique_ptr<IOBuf> iobuf1(IOBuf::create(20));
+  iobuf1->append(20);
+  RWPrivateCursor wcursor(iobuf1.get());
+  wcursor += 1;
+  wcursor.write((uint8_t)1);
+  Cursor cursor(iobuf1.get());
+  cursor += 1;
+  EXPECT_EQ(1, cursor.read<uint8_t>());
+}
+
+TEST(IOBuf, endian) {
+  unique_ptr<IOBuf> iobuf1(IOBuf::create(20));
+  iobuf1->append(20);
+  RWPrivateCursor wcursor(iobuf1.get());
+  Cursor rcursor(iobuf1.get());
+  uint16_t v = 1;
+  int16_t vu = -1;
+  wcursor.writeBE(v);
+  wcursor.writeBE(vu);
+  // Try a couple combinations to ensure they were generated correctly
+  wcursor.writeBE(vu);
+  wcursor.writeLE(vu);
+  wcursor.writeLE(vu);
+  wcursor.writeLE(v);
+  EXPECT_EQ(v, rcursor.readBE<uint16_t>());
+}
+
+TEST(IOBuf, Cursor) {
+  unique_ptr<IOBuf> iobuf1(IOBuf::create(1));
+  iobuf1->append(1);
+  RWPrivateCursor c(iobuf1.get());
+  c.write((uint8_t)40); // OK
+  try {
+    c.write((uint8_t)10); // Bad write, checked should except.
+    EXPECT_EQ(true, false);
+  } catch (...) {
+  }
+}
+
+TEST(IOBuf, UnshareCursor) {
+  uint8_t buf = 0;
+  unique_ptr<IOBuf> iobuf1(IOBuf::wrapBuffer(&buf, 1));
+  unique_ptr<IOBuf> iobuf2(IOBuf::wrapBuffer(&buf, 1));
+  RWUnshareCursor c1(iobuf1.get());
+  RWUnshareCursor c2(iobuf2.get());
+
+  c1.write((uint8_t)10); // This should duplicate the two buffers.
+  uint8_t t = c2.read<uint8_t>();
+  EXPECT_EQ(0, t);
+
+  iobuf1 = IOBuf::wrapBuffer(&buf, 1);
+  iobuf2 = IOBuf::wrapBuffer(&buf, 1);
+  RWPrivateCursor c3(iobuf1.get());
+  RWPrivateCursor c4(iobuf2.get());
+
+  c3.write((uint8_t)10); // This should _not_ duplicate the two buffers.
+  t = c4.read<uint8_t>();
+  EXPECT_EQ(10, t);
+}
+
+namespace {
+void append(std::unique_ptr<IOBuf>& buf, folly::StringPiece data) {
+  EXPECT_LE(data.size(), buf->tailroom());
+  memcpy(buf->writableData(), data.data(), data.size());
+  buf->append(data.size());
+}
+
+void append(Appender& appender, folly::StringPiece data) {
+  appender.push(reinterpret_cast<const uint8_t*>(data.data()), data.size());
+}
+
+std::string toString(const IOBuf& buf) {
+  std::string str;
+  Cursor cursor(&buf);
+  std::pair<const uint8_t*, size_t> p;
+  while ((p = cursor.peek()).second) {
+    str.append(reinterpret_cast<const char*>(p.first), p.second);
+    cursor.skip(p.second);
+  }
+  return str;
+}
+
+}  // namespace
+
+TEST(IOBuf, PullAndPeek) {
+  std::unique_ptr<IOBuf> iobuf1(IOBuf::create(10));
+  append(iobuf1, "he");
+  std::unique_ptr<IOBuf> iobuf2(IOBuf::create(10));
+  append(iobuf2, "llo ");
+  std::unique_ptr<IOBuf> iobuf3(IOBuf::create(10));
+  append(iobuf3, "world");
+  iobuf1->prependChain(std::move(iobuf2));
+  iobuf1->prependChain(std::move(iobuf3));
+  EXPECT_EQ(3, iobuf1->countChainElements());
+  EXPECT_EQ(11, iobuf1->computeChainDataLength());
+
+  char buf[12];
+  memset(buf, 0, sizeof(buf));
+  Cursor(iobuf1.get()).pull(buf, 11);
+  EXPECT_EQ("hello world", std::string(buf));
+
+  memset(buf, 0, sizeof(buf));
+  EXPECT_EQ(11, Cursor(iobuf1.get()).pullAtMost(buf, 20));
+  EXPECT_EQ("hello world", std::string(buf));
+
+  EXPECT_THROW({Cursor(iobuf1.get()).pull(buf, 20);},
+               std::out_of_range);
+
+  {
+    RWPrivateCursor cursor(iobuf1.get());
+    auto p = cursor.peek();
+    EXPECT_EQ("he", std::string(reinterpret_cast<const char*>(p.first),
+                                p.second));
+    cursor.skip(p.second);
+    p = cursor.peek();
+    EXPECT_EQ("llo ", std::string(reinterpret_cast<const char*>(p.first),
+                                  p.second));
+    cursor.skip(p.second);
+    p = cursor.peek();
+    EXPECT_EQ("world", std::string(reinterpret_cast<const char*>(p.first),
+                                   p.second));
+    cursor.skip(p.second);
+    EXPECT_EQ(3, iobuf1->countChainElements());
+    EXPECT_EQ(11, iobuf1->computeChainDataLength());
+  }
+
+  {
+    RWPrivateCursor cursor(iobuf1.get());
+    cursor.gather(11);
+    auto p = cursor.peek();
+    EXPECT_EQ("hello world", std::string(reinterpret_cast<const
+                                         char*>(p.first), p.second));
+    EXPECT_EQ(1, iobuf1->countChainElements());
+    EXPECT_EQ(11, iobuf1->computeChainDataLength());
+  }
+}
+
+TEST(IOBuf, Appender) {
+  std::unique_ptr<IOBuf> head(IOBuf::create(10));
+  append(head, "hello");
+
+  Appender app(head.get(), 10);
+  uint32_t cap = head->capacity();
+  uint32_t len1 = app.length();
+  EXPECT_EQ(cap - 5, len1);
+  app.ensure(len1);  // won't grow
+  EXPECT_EQ(len1, app.length());
+  app.ensure(len1 + 1);  // will grow
+  EXPECT_LE(len1 + 1, app.length());
+
+  append(app, " world");
+  EXPECT_EQ("hello world", toString(*head));
+}
+
+int benchmark_size = 1000;
+unique_ptr<IOBuf> iobuf_benchmark;
+
+unique_ptr<IOBuf> iobuf_read_benchmark;
+
+template <class CursClass>
+void runBenchmark() {
+  CursClass c(iobuf_benchmark.get());
+
+  for(int i = 0; i < benchmark_size; i++) {
+    c.write((uint8_t)0);
+  }
+}
+
+BENCHMARK(rwPrivateCursorBenchmark, iters) {
+  while (--iters) {
+    runBenchmark<RWPrivateCursor>();
+  }
+}
+
+BENCHMARK(rwUnshareCursorBenchmark, iters) {
+  while (--iters) {
+    runBenchmark<RWUnshareCursor>();
+  }
+}
+
+
+BENCHMARK(cursorBenchmark, iters) {
+  while (--iters) {
+    Cursor c(iobuf_read_benchmark.get());
+    for(int i = 0; i < benchmark_size ; i++) {
+      c.read<uint8_t>();
+    }
+  }
+}
+
+BENCHMARK(skipBenchmark, iters) {
+  uint8_t buf;
+  while (--iters) {
+    Cursor c(iobuf_read_benchmark.get());
+    for(int i = 0; i < benchmark_size ; i++) {
+      c.peek();
+      c.skip(1);
+    }
+  }
+}
+
+// fbmake opt
+// _bin/folly/experimental/io/test/iobuf_cursor_test -benchmark
+//
+// Benchmark                               Iters   Total t    t/iter iter/sec
+// ---------------------------------------------------------------------------
+// rwPrivateCursorBenchmark               100000  142.9 ms  1.429 us  683.5 k
+// rwUnshareCursorBenchmark               100000  309.3 ms  3.093 us  315.7 k
+// cursorBenchmark                        100000  741.4 ms  7.414 us  131.7 k
+// skipBenchmark                          100000  738.9 ms  7.389 us  132.2 k
+//
+// uname -a:
+//
+// Linux dev2159.snc6.facebook.com 2.6.33-7_fbk15_104e4d0 #1 SMP
+// Tue Oct 19 22:40:30 PDT 2010 x86_64 x86_64 x86_64 GNU/Linux
+//
+// 72GB RAM, 2 CPUs (Intel(R) Xeon(R) CPU L5630  @ 2.13GHz)
+// hyperthreading disabled
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  auto ret = RUN_ALL_TESTS();
+
+  if (ret == 0 && FLAGS_benchmark) {
+    iobuf_benchmark = IOBuf::create(benchmark_size);
+    iobuf_benchmark->append(benchmark_size);
+
+    iobuf_read_benchmark = IOBuf::create(1);
+    for (int i = 0; i < benchmark_size; i++) {
+      unique_ptr<IOBuf> iobuf2(IOBuf::create(1));
+      iobuf2->append(1);
+      iobuf_read_benchmark->prependChain(std::move(iobuf2));
+    }
+
+    folly::runBenchmarks();
+  }
+
+  return ret;
+}
diff --git a/folly/experimental/io/test/IOBufQueueTest.cpp b/folly/experimental/io/test/IOBufQueueTest.cpp
new file mode 100644
index 00000000..58f727a5
--- /dev/null
+++ b/folly/experimental/io/test/IOBufQueueTest.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/experimental/io/IOBufQueue.h"
+
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+
+#include <iostream>
+#include <stdexcept>
+#include <string.h>
+
+using folly::IOBuf;
+using folly::IOBufQueue;
+using std::pair;
+using std::string;
+using std::unique_ptr;
+
+// String Comma Length macro for string literals
+#define SCL(x) (x), sizeof(x) - 1
+
+namespace {
+
+IOBufQueue::Options clOptions;
+struct Initializer {
+  Initializer() {
+    clOptions.cacheChainLength = true;
+  }
+};
+Initializer initializer;
+
+unique_ptr<IOBuf>
+stringToIOBuf(const char* s, uint32_t len) {
+  unique_ptr<IOBuf> buf = IOBuf::create(len);
+  memcpy(buf->writableTail(), s, len);
+  buf->append(len);
+  return std::move(buf);
+}
+
+void checkConsistency(const IOBufQueue& queue) {
+  if (queue.options().cacheChainLength) {
+    size_t len = queue.front() ? queue.front()->computeChainDataLength() : 0;
+    EXPECT_EQ(len, queue.chainLength());
+  }
+}
+
+}
+
+TEST(IOBufQueue, Simple) {
+  IOBufQueue queue(clOptions);
+  EXPECT_EQ(NULL, queue.front());
+  queue.append(SCL(""));
+  EXPECT_EQ(NULL, queue.front());
+  queue.append(unique_ptr<IOBuf>());
+  EXPECT_EQ(NULL, queue.front());
+  string emptyString;
+  queue.append(emptyString);
+  EXPECT_EQ(NULL, queue.front());
+}
+
+TEST(IOBufQueue, Append) {
+  IOBufQueue queue(clOptions);
+  queue.append(SCL("Hello"));
+  IOBufQueue queue2(clOptions);
+  queue2.append(SCL(", "));
+  queue2.append(SCL("World"));
+  checkConsistency(queue);
+  checkConsistency(queue2);
+  queue.append(queue2.move());
+  checkConsistency(queue);
+  checkConsistency(queue2);
+  const IOBuf* chain = queue.front();
+  EXPECT_NE((IOBuf*)NULL, chain);
+  EXPECT_EQ(12, chain->computeChainDataLength());
+  EXPECT_EQ(NULL, queue2.front());
+}
+
+TEST(IOBufQueue, Append2) {
+  IOBufQueue queue(clOptions);
+  queue.append(SCL("Hello"));
+  IOBufQueue queue2(clOptions);
+  queue2.append(SCL(", "));
+  queue2.append(SCL("World"));
+  checkConsistency(queue);
+  checkConsistency(queue2);
+  queue.append(queue2);
+  checkConsistency(queue);
+  checkConsistency(queue2);
+  const IOBuf* chain = queue.front();
+  EXPECT_NE((IOBuf*)NULL, chain);
+  EXPECT_EQ(12, chain->computeChainDataLength());
+  EXPECT_EQ(NULL, queue2.front());
+}
+
+TEST(IOBufQueue, Split) {
+  IOBufQueue queue(clOptions);
+  queue.append(stringToIOBuf(SCL("Hello")));
+  queue.append(stringToIOBuf(SCL(",")));
+  queue.append(stringToIOBuf(SCL(" ")));
+  queue.append(stringToIOBuf(SCL("")));
+  queue.append(stringToIOBuf(SCL("World")));
+  checkConsistency(queue);
+  EXPECT_EQ(12, queue.front()->computeChainDataLength());
+
+  unique_ptr<IOBuf> prefix(queue.split(1));
+  checkConsistency(queue);
+  EXPECT_EQ(1, prefix->computeChainDataLength());
+  EXPECT_EQ(11, queue.front()->computeChainDataLength());
+  prefix = queue.split(2);
+  checkConsistency(queue);
+  EXPECT_EQ(2, prefix->computeChainDataLength());
+  EXPECT_EQ(9, queue.front()->computeChainDataLength());
+  prefix = queue.split(3);
+  checkConsistency(queue);
+  EXPECT_EQ(3, prefix->computeChainDataLength());
+  EXPECT_EQ(6, queue.front()->computeChainDataLength());
+  prefix = queue.split(1);
+  checkConsistency(queue);
+  EXPECT_EQ(1, prefix->computeChainDataLength());
+  EXPECT_EQ(5, queue.front()->computeChainDataLength());
+  prefix = queue.split(5);
+  checkConsistency(queue);
+  EXPECT_EQ(5, prefix->computeChainDataLength());
+  EXPECT_EQ((IOBuf*)NULL, queue.front());
+
+  queue.append(stringToIOBuf(SCL("Hello,")));
+  queue.append(stringToIOBuf(SCL(" World")));
+  checkConsistency(queue);
+  bool exceptionFired = false;
+  EXPECT_THROW({prefix = queue.split(13);}, std::underflow_error);
+  checkConsistency(queue);
+}
+
+TEST(IOBufQueue, Preallocate) {
+  IOBufQueue queue(clOptions);
+  queue.append(string("Hello"));
+  pair<void*,uint32_t> writable = queue.preallocate(2, 64);
+  checkConsistency(queue);
+  EXPECT_NE((void*)NULL, writable.first);
+  EXPECT_LE(2, writable.second);
+  EXPECT_GE(64, writable.second);
+  memcpy(writable.first, SCL(", "));
+  queue.postallocate(2);
+  checkConsistency(queue);
+  EXPECT_EQ(7, queue.front()->computeChainDataLength());
+  queue.append(SCL("World"));
+  checkConsistency(queue);
+  EXPECT_EQ(12, queue.front()->computeChainDataLength());
+  writable = queue.preallocate(1024, 4096);
+  checkConsistency(queue);
+  EXPECT_LE(1024, writable.second);
+  EXPECT_GE(4096, writable.second);
+}
+
+TEST(IOBufQueue, trim) {
+  IOBufQueue queue(clOptions);
+  unique_ptr<IOBuf> a = IOBuf::create(4);
+  a->append(4);
+  queue.append(std::move(a));
+  checkConsistency(queue);
+  a = IOBuf::create(6);
+  a->append(6);
+  queue.append(std::move(a));
+  checkConsistency(queue);
+  a = IOBuf::create(8);
+  a->append(8);
+  queue.append(std::move(a));
+  checkConsistency(queue);
+  a = IOBuf::create(10);
+  a->append(10);
+  queue.append(std::move(a));
+  checkConsistency(queue);
+
+  EXPECT_EQ(4, queue.front()->countChainElements());
+  EXPECT_EQ(28, queue.front()->computeChainDataLength());
+  EXPECT_EQ(4, queue.front()->length());
+
+  queue.trimStart(1);
+  checkConsistency(queue);
+  EXPECT_EQ(4, queue.front()->countChainElements());
+  EXPECT_EQ(27, queue.front()->computeChainDataLength());
+  EXPECT_EQ(3, queue.front()->length());
+
+  queue.trimStart(5);
+  checkConsistency(queue);
+  EXPECT_EQ(3, queue.front()->countChainElements());
+  EXPECT_EQ(22, queue.front()->computeChainDataLength());
+  EXPECT_EQ(4, queue.front()->length());
+
+  queue.trimEnd(1);
+  checkConsistency(queue);
+  EXPECT_EQ(3, queue.front()->countChainElements());
+  EXPECT_EQ(21, queue.front()->computeChainDataLength());
+  EXPECT_EQ(9, queue.front()->prev()->length());
+
+  queue.trimEnd(20);
+  checkConsistency(queue);
+  EXPECT_EQ(1, queue.front()->countChainElements());
+  EXPECT_EQ(1, queue.front()->computeChainDataLength());
+  EXPECT_EQ(1, queue.front()->prev()->length());
+
+  queue.trimEnd(1);
+  checkConsistency(queue);
+  EXPECT_EQ(NULL, queue.front());
+
+  EXPECT_THROW(queue.trimStart(2), std::underflow_error);
+  checkConsistency(queue);
+
+  EXPECT_THROW(queue.trimEnd(30), std::underflow_error);
+  checkConsistency(queue);
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/folly/experimental/io/test/IOBufTest.cpp b/folly/experimental/io/test/IOBufTest.cpp
new file mode 100644
index 00000000..98ac2081
--- /dev/null
+++ b/folly/experimental/io/test/IOBufTest.cpp
@@ -0,0 +1,525 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/experimental/io/IOBuf.h"
+
+#include <gflags/gflags.h>
+#include <boost/random.hpp>
+#include <gtest/gtest.h>
+
+#include "folly/Malloc.h"
+#include "folly/Range.h"
+
+using folly::IOBuf;
+using folly::StringPiece;
+using std::unique_ptr;
+
+void append(std::unique_ptr<IOBuf>& buf, StringPiece str) {
+  EXPECT_LE(str.size(), buf->tailroom());
+  memcpy(buf->writableData(), str.data(), str.size());
+  buf->append(str.size());
+}
+
+TEST(IOBuf, Simple) {
+  unique_ptr<IOBuf> buf(IOBuf::create(100));
+  uint32_t cap = buf->capacity();
+  EXPECT_LE(100, cap);
+  EXPECT_EQ(0, buf->headroom());
+  EXPECT_EQ(0, buf->length());
+  EXPECT_EQ(cap, buf->tailroom());
+
+  append(buf, "hello");
+  buf->advance(10);
+  EXPECT_EQ(10, buf->headroom());
+  EXPECT_EQ(5, buf->length());
+  EXPECT_EQ(cap - 15, buf->tailroom());
+  const char* p = reinterpret_cast<const char*>(buf->data());
+  EXPECT_EQ("hello", std::string(p, buf->length()));
+
+  buf->clear();
+  EXPECT_EQ(0, buf->headroom());
+  EXPECT_EQ(0, buf->length());
+  EXPECT_EQ(cap, buf->tailroom());
+}
+
+
+void testAllocSize(uint32_t requestedCapacity) {
+  unique_ptr<IOBuf> iobuf(IOBuf::create(requestedCapacity));
+  EXPECT_GE(iobuf->capacity(), requestedCapacity);
+}
+
+TEST(IOBuf, AllocSizes) {
+  // Try with a small allocation size that should fit in the internal buffer
+  testAllocSize(28);
+
+  // Try with a large allocation size that will require an external buffer.
+  testAllocSize(9000);
+
+  // 220 bytes is currently the cutoff
+  // (It would be nice to use the IOBuf::kMaxInternalDataSize constant,
+  // but it's private and it doesn't seem worth making it public just for this
+  // test code.)
+  testAllocSize(220);
+  testAllocSize(219);
+  testAllocSize(221);
+}
+
+void deleteArrayBuffer(void *buf, void* arg) {
+  uint32_t* deleteCount = static_cast<uint32_t*>(arg);
+  ++(*deleteCount);
+  uint8_t* bufPtr = static_cast<uint8_t*>(buf);
+  delete[] bufPtr;
+}
+
+TEST(IOBuf, TakeOwnership) {
+  uint32_t size1 = 99;
+  uint8_t *buf1 = static_cast<uint8_t*>(malloc(size1));
+  unique_ptr<IOBuf> iobuf1(IOBuf::takeOwnership(buf1, size1));
+  EXPECT_EQ(buf1, iobuf1->data());
+  EXPECT_EQ(size1, iobuf1->length());
+  EXPECT_EQ(buf1, iobuf1->buffer());
+  EXPECT_EQ(size1, iobuf1->capacity());
+
+  uint32_t deleteCount = 0;
+  uint32_t size2 = 4321;
+  uint8_t *buf2 = new uint8_t[size2];
+  unique_ptr<IOBuf> iobuf2(IOBuf::takeOwnership(buf2, size2,
+                                                deleteArrayBuffer,
+                                                &deleteCount));
+  EXPECT_EQ(buf2, iobuf2->data());
+  EXPECT_EQ(size2, iobuf2->length());
+  EXPECT_EQ(buf2, iobuf2->buffer());
+  EXPECT_EQ(size2, iobuf2->capacity());
+  EXPECT_EQ(0, deleteCount);
+  iobuf2.reset();
+  EXPECT_EQ(1, deleteCount);
+}
+
+TEST(IOBuf, WrapBuffer) {
+  const uint32_t size1 = 1234;
+  uint8_t buf1[size1];
+  unique_ptr<IOBuf> iobuf1(IOBuf::wrapBuffer(buf1, size1));
+  EXPECT_EQ(buf1, iobuf1->data());
+  EXPECT_EQ(size1, iobuf1->length());
+  EXPECT_EQ(buf1, iobuf1->buffer());
+  EXPECT_EQ(size1, iobuf1->capacity());
+
+  uint32_t size2 = 0x1234;
+  unique_ptr<uint8_t[]> buf2(new uint8_t[size2]);
+  unique_ptr<IOBuf> iobuf2(IOBuf::wrapBuffer(buf2.get(), size2));
+  EXPECT_EQ(buf2.get(), iobuf2->data());
+  EXPECT_EQ(size2, iobuf2->length());
+  EXPECT_EQ(buf2.get(), iobuf2->buffer());
+  EXPECT_EQ(size2, iobuf2->capacity());
+}
+
+void fillBuf(uint8_t* buf, uint32_t length, boost::mt19937& gen) {
+  for (uint32_t n = 0; n < length; ++n) {
+    buf[n] = static_cast<uint8_t>(gen() & 0xff);
+  }
+}
+
+void fillBuf(IOBuf* buf, boost::mt19937& gen) {
+  buf->unshare();
+  fillBuf(buf->writableData(), buf->length(), gen);
+}
+
+void checkBuf(const uint8_t* buf, uint32_t length, boost::mt19937& gen) {
+  // Rather than using EXPECT_EQ() to check each character,
+  // count the number of differences and the first character that differs.
+  // This way on error we'll report just that information, rather than tons of
+  // failed checks for each byte in the buffer.
+  uint32_t numDifferences = 0;
+  uint32_t firstDiffIndex = 0;
+  uint8_t firstDiffExpected = 0;
+  for (uint32_t n = 0; n < length; ++n) {
+    uint8_t expected = static_cast<uint8_t>(gen() & 0xff);
+    if (buf[n] == expected) {
+      continue;
+    }
+
+    if (numDifferences == 0) {
+      firstDiffIndex = n;
+      firstDiffExpected = expected;
+    }
+    ++numDifferences;
+  }
+
+  EXPECT_EQ(0, numDifferences);
+  if (numDifferences > 0) {
+    // Cast to int so it will be printed numerically
+    // rather than as a char if the check fails
+    EXPECT_EQ(static_cast<int>(buf[firstDiffIndex]),
+              static_cast<int>(firstDiffExpected));
+  }
+}
+
+void checkBuf(IOBuf* buf, boost::mt19937& gen) {
+  checkBuf(buf->data(), buf->length(), gen);
+}
+
+void checkChain(IOBuf* buf, boost::mt19937& gen) {
+  IOBuf *current = buf;
+  do {
+    checkBuf(current->data(), current->length(), gen);
+    current = current->next();
+  } while (current != buf);
+}
+
+TEST(IOBuf, Chaining) {
+  uint32_t fillSeed = 0x12345678;
+  boost::mt19937 gen(fillSeed);
+
+  // An IOBuf with external storage
+  uint32_t headroom = 123;
+  unique_ptr<IOBuf> iob1(IOBuf::create(2048));
+  iob1->advance(headroom);
+  iob1->append(1500);
+  fillBuf(iob1.get(), gen);
+
+  // An IOBuf with internal storage
+  unique_ptr<IOBuf> iob2(IOBuf::create(20));
+  iob2->append(20);
+  fillBuf(iob2.get(), gen);
+
+  // An IOBuf around a buffer it doesn't own
+  uint8_t localbuf[1234];
+  fillBuf(localbuf, 1234, gen);
+  unique_ptr<IOBuf> iob3(IOBuf::wrapBuffer(localbuf, sizeof(localbuf)));
+
+  // An IOBuf taking ownership of a user-supplied buffer
+  uint32_t heapBufSize = 900;
+  uint8_t* heapBuf = static_cast<uint8_t*>(malloc(heapBufSize));
+  fillBuf(heapBuf, heapBufSize, gen);
+  unique_ptr<IOBuf> iob4(IOBuf::takeOwnership(heapBuf, heapBufSize));
+
+  // An IOBuf taking ownership of a user-supplied buffer with
+  // a custom free function
+  uint32_t arrayBufSize = 321;
+  uint8_t* arrayBuf = new uint8_t[arrayBufSize];
+  fillBuf(arrayBuf, arrayBufSize, gen);
+  uint32_t arrayBufFreeCount = 0;
+  unique_ptr<IOBuf> iob5(IOBuf::takeOwnership(arrayBuf, arrayBufSize,
+                                              deleteArrayBuffer,
+                                              &arrayBufFreeCount));
+
+  EXPECT_FALSE(iob1->isChained());
+  EXPECT_FALSE(iob2->isChained());
+  EXPECT_FALSE(iob3->isChained());
+  EXPECT_FALSE(iob4->isChained());
+  EXPECT_FALSE(iob5->isChained());
+
+  EXPECT_FALSE(iob1->isSharedOne());
+  EXPECT_FALSE(iob2->isSharedOne());
+  EXPECT_TRUE(iob3->isSharedOne()); // since we own the buffer
+  EXPECT_FALSE(iob4->isSharedOne());
+  EXPECT_FALSE(iob5->isSharedOne());
+
+  // Chain the buffers all together
+  // Since we are going to relinquish ownership of iob2-5 to the chain,
+  // store raw pointers to them so we can reference them later.
+  IOBuf* iob2ptr = iob2.get();
+  IOBuf* iob3ptr = iob3.get();
+  IOBuf* iob4ptr = iob4.get();
+  IOBuf* iob5ptr = iob5.get();
+
+  iob1->prependChain(std::move(iob2));
+  iob1->prependChain(std::move(iob4));
+  iob2ptr->appendChain(std::move(iob3));
+  iob1->prependChain(std::move(iob5));
+
+  EXPECT_EQ(iob2ptr, iob1->next());
+  EXPECT_EQ(iob3ptr, iob2ptr->next());
+  EXPECT_EQ(iob4ptr, iob3ptr->next());
+  EXPECT_EQ(iob5ptr, iob4ptr->next());
+  EXPECT_EQ(iob1.get(), iob5ptr->next());
+
+  EXPECT_EQ(iob5ptr, iob1->prev());
+  EXPECT_EQ(iob1.get(), iob2ptr->prev());
+  EXPECT_EQ(iob2ptr, iob3ptr->prev());
+  EXPECT_EQ(iob3ptr, iob4ptr->prev());
+  EXPECT_EQ(iob4ptr, iob5ptr->prev());
+
+  EXPECT_TRUE(iob1->isChained());
+  EXPECT_TRUE(iob2ptr->isChained());
+  EXPECT_TRUE(iob3ptr->isChained());
+  EXPECT_TRUE(iob4ptr->isChained());
+  EXPECT_TRUE(iob5ptr->isChained());
+
+  uint64_t fullLength = (iob1->length() + iob2ptr->length() +
+                         iob3ptr->length() + iob4ptr->length() +
+                         iob5ptr->length());
+  EXPECT_EQ(5, iob1->countChainElements());
+  EXPECT_EQ(fullLength, iob1->computeChainDataLength());
+
+  // Since iob3 is shared, the entire buffer should report itself as shared
+  EXPECT_TRUE(iob1->isShared());
+  // Unshare just iob3
+  iob3ptr->unshareOne();
+  EXPECT_FALSE(iob3ptr->isSharedOne());
+  // Now everything in the chain should be unshared.
+  // Check on all members of the chain just for good measure
+  EXPECT_FALSE(iob1->isShared());
+  EXPECT_FALSE(iob2ptr->isShared());
+  EXPECT_FALSE(iob3ptr->isShared());
+  EXPECT_FALSE(iob4ptr->isShared());
+  EXPECT_FALSE(iob5ptr->isShared());
+
+
+  // Clone one of the IOBufs in the chain
+  unique_ptr<IOBuf> iob4clone = iob4ptr->cloneOne();
+  gen.seed(fillSeed);
+  checkBuf(iob1.get(), gen);
+  checkBuf(iob2ptr, gen);
+  checkBuf(iob3ptr, gen);
+  checkBuf(iob4clone.get(), gen);
+  checkBuf(iob5ptr, gen);
+
+  EXPECT_TRUE(iob1->isShared());
+  EXPECT_TRUE(iob2ptr->isShared());
+  EXPECT_TRUE(iob3ptr->isShared());
+  EXPECT_TRUE(iob4ptr->isShared());
+  EXPECT_TRUE(iob5ptr->isShared());
+
+  EXPECT_FALSE(iob1->isSharedOne());
+  EXPECT_FALSE(iob2ptr->isSharedOne());
+  EXPECT_FALSE(iob3ptr->isSharedOne());
+  EXPECT_TRUE(iob4ptr->isSharedOne());
+  EXPECT_FALSE(iob5ptr->isSharedOne());
+
+  // Unshare that clone
+  EXPECT_TRUE(iob4clone->isSharedOne());
+  iob4clone->unshare();
+  EXPECT_FALSE(iob4clone->isSharedOne());
+  EXPECT_FALSE(iob4ptr->isSharedOne());
+  EXPECT_FALSE(iob1->isShared());
+  iob4clone.reset();
+
+
+  // Create a clone of a different IOBuf
+  EXPECT_FALSE(iob1->isShared());
+  EXPECT_FALSE(iob3ptr->isSharedOne());
+
+  unique_ptr<IOBuf> iob3clone = iob3ptr->cloneOne();
+  gen.seed(fillSeed);
+  checkBuf(iob1.get(), gen);
+  checkBuf(iob2ptr, gen);
+  checkBuf(iob3clone.get(), gen);
+  checkBuf(iob4ptr, gen);
+  checkBuf(iob5ptr, gen);
+
+  EXPECT_TRUE(iob1->isShared());
+  EXPECT_TRUE(iob3ptr->isSharedOne());
+  EXPECT_FALSE(iob1->isSharedOne());
+
+  // Delete the clone and make sure the original is unshared
+  iob3clone.reset();
+  EXPECT_FALSE(iob1->isShared());
+  EXPECT_FALSE(iob3ptr->isSharedOne());
+
+
+  // Clone the entire chain
+  unique_ptr<IOBuf> chainClone = iob1->clone();
+  // Verify that the data is correct.
+  EXPECT_EQ(fullLength, chainClone->computeChainDataLength());
+  gen.seed(fillSeed);
+  checkChain(chainClone.get(), gen);
+
+  // Check that the buffers report sharing correctly
+  EXPECT_TRUE(chainClone->isShared());
+  EXPECT_TRUE(iob1->isShared());
+
+  EXPECT_TRUE(iob1->isSharedOne());
+  // since iob2 has a small internal buffer, it will never be shared
+  EXPECT_FALSE(iob2ptr->isSharedOne());
+  EXPECT_TRUE(iob3ptr->isSharedOne());
+  EXPECT_TRUE(iob4ptr->isSharedOne());
+  EXPECT_TRUE(iob5ptr->isSharedOne());
+
+  // Unshare the cloned chain
+  chainClone->unshare();
+  EXPECT_FALSE(chainClone->isShared());
+  EXPECT_FALSE(iob1->isShared());
+
+  // Make sure the unshared result still has the same data
+  EXPECT_EQ(fullLength, chainClone->computeChainDataLength());
+  gen.seed(fillSeed);
+  checkChain(chainClone.get(), gen);
+
+  // Destroy this chain
+  chainClone.reset();
+
+
+  // Clone a new chain
+  EXPECT_FALSE(iob1->isShared());
+  chainClone = iob1->clone();
+  EXPECT_TRUE(iob1->isShared());
+  EXPECT_TRUE(chainClone->isShared());
+
+  // Delete the original chain
+  iob1.reset();
+  EXPECT_FALSE(chainClone->isShared());
+
+  // Coalesce the chain
+  //
+  // Coalescing this chain will create a new buffer and release the last
+  // refcount on the original buffers we created.  Also make sure
+  // that arrayBufFreeCount increases to one to indicate that arrayBuf was
+  // freed.
+  EXPECT_EQ(5, chainClone->countChainElements());
+  EXPECT_EQ(0, arrayBufFreeCount);
+
+  // Buffer lengths: 1500 20 1234 900 321
+  // Coalesce the first 3 buffers
+  chainClone->gather(1521);
+  EXPECT_EQ(3, chainClone->countChainElements());
+  EXPECT_EQ(0, arrayBufFreeCount);
+
+  // Make sure the data is still the same after coalescing
+  EXPECT_EQ(fullLength, chainClone->computeChainDataLength());
+  gen.seed(fillSeed);
+  checkChain(chainClone.get(), gen);
+
+  // Coalesce the entire chain
+  chainClone->coalesce();
+  EXPECT_EQ(1, chainClone->countChainElements());
+  EXPECT_EQ(1, arrayBufFreeCount);
+
+  // Make sure the data is still the same after coalescing
+  EXPECT_EQ(fullLength, chainClone->computeChainDataLength());
+  gen.seed(fillSeed);
+  checkChain(chainClone.get(), gen);
+
+  // Make a new chain to test the unlink and pop operations
+  iob1 = IOBuf::create(1);
+  iob1->append(1);
+  IOBuf *iob1ptr = iob1.get();
+  iob2 = IOBuf::create(3);
+  iob2->append(3);
+  iob2ptr = iob2.get();
+  iob3 = IOBuf::create(5);
+  iob3->append(5);
+  iob3ptr = iob3.get();
+  iob4 = IOBuf::create(7);
+  iob4->append(7);
+  iob4ptr = iob4.get();
+  iob1->appendChain(std::move(iob2));
+  iob1->prev()->appendChain(std::move(iob3));
+  iob1->prev()->appendChain(std::move(iob4));
+  EXPECT_EQ(4, iob1->countChainElements());
+  EXPECT_EQ(16, iob1->computeChainDataLength());
+
+  // Unlink from the middle of the chain
+  iob3 = iob3ptr->unlink();
+  EXPECT_TRUE(iob3.get() == iob3ptr);
+  EXPECT_EQ(3, iob1->countChainElements());
+  EXPECT_EQ(11, iob1->computeChainDataLength());
+
+  // Unlink from the end of the chain
+  iob4 = iob1->prev()->unlink();
+  EXPECT_TRUE(iob4.get() == iob4ptr);
+  EXPECT_EQ(2, iob1->countChainElements());
+  EXPECT_TRUE(iob1->next() == iob2ptr);
+  EXPECT_EQ(4, iob1->computeChainDataLength());
+
+  // Pop from the front of the chain
+  iob2 = iob1->pop();
+  EXPECT_TRUE(iob1.get() == iob1ptr);
+  EXPECT_EQ(1, iob1->countChainElements());
+  EXPECT_EQ(1, iob1->computeChainDataLength());
+  EXPECT_TRUE(iob2.get() == iob2ptr);
+  EXPECT_EQ(1, iob2->countChainElements());
+  EXPECT_EQ(3, iob2->computeChainDataLength());
+}
+
+TEST(IOBuf, Reserve) {
+  uint32_t fillSeed = 0x23456789;
+  boost::mt19937 gen(fillSeed);
+
+  // Reserve does nothing if empty and doesn't have to grow the buffer
+  {
+    gen.seed(fillSeed);
+    unique_ptr<IOBuf> iob(IOBuf::create(2000));
+    EXPECT_EQ(0, iob->headroom());
+    const void* p1 = iob->buffer();
+    iob->reserve(5, 15);
+    EXPECT_LE(5, iob->headroom());
+    EXPECT_EQ(p1, iob->buffer());
+  }
+
+  // Reserve doesn't reallocate if we have enough total room
+  {
+    gen.seed(fillSeed);
+    unique_ptr<IOBuf> iob(IOBuf::create(2000));
+    iob->append(100);
+    fillBuf(iob.get(), gen);
+    EXPECT_EQ(0, iob->headroom());
+    EXPECT_EQ(100, iob->length());
+    const void* p1 = iob->buffer();
+    const uint8_t* d1 = iob->data();
+    iob->reserve(100, 1800);
+    EXPECT_LE(100, iob->headroom());
+    EXPECT_EQ(p1, iob->buffer());
+    EXPECT_EQ(d1 + 100, iob->data());
+    gen.seed(fillSeed);
+    checkBuf(iob.get(), gen);
+  }
+
+  // Reserve reallocates if we don't have enough total room.
+  // NOTE that, with jemalloc, we know that this won't reallocate in place
+  // as the size is less than jemallocMinInPlaceExpanadable
+  {
+    gen.seed(fillSeed);
+    unique_ptr<IOBuf> iob(IOBuf::create(2000));
+    iob->append(100);
+    fillBuf(iob.get(), gen);
+    EXPECT_EQ(0, iob->headroom());
+    EXPECT_EQ(100, iob->length());
+    const void* p1 = iob->buffer();
+    const uint8_t* d1 = iob->data();
+    iob->reserve(100, 2512);  // allocation sizes are multiples of 256
+    EXPECT_LE(100, iob->headroom());
+    if (folly::usingJEMalloc()) {
+      EXPECT_NE(p1, iob->buffer());
+    }
+    gen.seed(fillSeed);
+    checkBuf(iob.get(), gen);
+  }
+
+  // Test reserve from internal buffer, this used to segfault
+  {
+    unique_ptr<IOBuf> iob(IOBuf::create(0));
+    iob->reserve(0, 2000);
+    EXPECT_EQ(0, iob->headroom());
+    EXPECT_LE(2000, iob->tailroom());
+  }
+}
+
+TEST(IOBuf, copyBuffer) {
+  std::string s("hello");
+  auto buf = IOBuf::copyBuffer(s.data(), s.size(), 1, 2);
+  EXPECT_EQ(1, buf->headroom());
+  EXPECT_EQ(s, std::string(reinterpret_cast<const char*>(buf->data()),
+                           buf->length()));
+  EXPECT_LE(2, buf->tailroom());
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/folly/experimental/io/test/Makefile.am b/folly/experimental/io/test/Makefile.am
new file mode 100644
index 00000000..b6c5f4f5
--- /dev/null
+++ b/folly/experimental/io/test/Makefile.am
@@ -0,0 +1,12 @@
+ACLOCAL_AMFLAGS = -I m4
+
+TESTS = iobuf_test \
+        iobuf_cursor_test
+
+check_PROGRAMS = $(TESTS)
+
+iobuf_test_SOURCES = IOBufTest.cpp
+iobuf_test_LDADD = $(top_builddir)/libfollyio.la
+
+iobuf_cursor_test_SOURCES = IOBufCursorTest.cpp
+iobuf_cursor_test_LDADD = $(top_builddir)/libfollyio.la $(top_builddir)/libfollybenchmark.la
diff --git a/folly/experimental/io/test/NetworkBenchmark.cpp b/folly/experimental/io/test/NetworkBenchmark.cpp
new file mode 100644
index 00000000..9b8fea72
--- /dev/null
+++ b/folly/experimental/io/test/NetworkBenchmark.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/experimental/io/IOBuf.h"
+
+#include <gflags/gflags.h>
+#include "folly/Benchmark.h"
+#include "folly/experimental/io/Cursor.h"
+
+#include <vector>
+
+using folly::IOBuf;
+using std::unique_ptr;
+using namespace folly::io;
+using namespace std;
+
+size_t buf_size = 0;
+size_t num_bufs = 0;
+
+BENCHMARK(reserveBenchmark, iters) {
+  while (--iters) {
+    unique_ptr<IOBuf> iobuf1(IOBuf::create(buf_size));
+    iobuf1->append(buf_size);
+    for (size_t bufs = num_bufs; bufs > 1; bufs --) {
+      iobuf1->reserve(0, buf_size);
+      iobuf1->append(buf_size);
+    }
+  }
+}
+
+BENCHMARK(chainBenchmark, iters) {
+  while (--iters) {
+    unique_ptr<IOBuf> iobuf1(IOBuf::create(buf_size));
+    iobuf1->append(buf_size);
+    for (size_t bufs = num_bufs; bufs > 1; bufs --) {
+      unique_ptr<IOBuf> iobufNext(IOBuf::create(buf_size));
+      iobuf1->prependChain(std::move(iobufNext));
+    }
+  }
+}
+
+vector<unique_ptr<IOBuf>> bufPool;
+inline unique_ptr<IOBuf> poolGetIOBuf() {
+  if (bufPool.size() > 0) {
+    unique_ptr<IOBuf> ret = std::move(bufPool.back());
+    bufPool.pop_back();
+    return std::move(ret);
+  } else {
+    unique_ptr<IOBuf> iobuf(IOBuf::create(buf_size));
+    iobuf->append(buf_size);
+    return std::move(iobuf);
+  }
+}
+
+inline void poolPutIOBuf(unique_ptr<IOBuf>&& buf) {
+  unique_ptr<IOBuf> head = std::move(buf);
+  while (head) {
+    unique_ptr<IOBuf> next = std::move(head->pop());
+    bufPool.push_back(std::move(head));
+    head = std::move(next);
+  }
+}
+
+BENCHMARK(poolBenchmark, iters) {
+  while (--iters) {
+    unique_ptr<IOBuf> head = std::move(poolGetIOBuf());
+    for (size_t bufs = num_bufs; bufs > 1; bufs --) {
+      unique_ptr<IOBuf> iobufNext = std::move(poolGetIOBuf());
+      head->prependChain(std::move(iobufNext));
+    }
+    // cleanup
+    poolPutIOBuf(std::move(head));
+  }
+}
+
+void setNumbers(size_t size, size_t num) {
+  buf_size = size;
+  num_bufs = num;
+  bufPool.clear();
+
+  printf("\nBuffer size: %zu, number of buffers: %zu\n\n", size, num);
+}
+
+/*
+------------------------------------------------------------------------------
+reserveBenchmark                       100000  9.186 ms  91.86 ns  10.38 M
+chainBenchmark                         100000  59.44 ms  594.4 ns  1.604 M
+poolBenchmark                          100000  15.87 ms  158.7 ns   6.01 M
+
+Buffer size: 100, number of buffers: 10
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+reserveBenchmark                       100000     62 ms    620 ns  1.538 M
+chainBenchmark                         100000  59.48 ms  594.8 ns  1.603 M
+poolBenchmark                          100000  16.07 ms  160.7 ns  5.933 M
+
+Buffer size: 2048, number of buffers: 10
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+reserveBenchmark                       100000  148.4 ms  1.484 us  658.2 k
+chainBenchmark                         100000  140.9 ms  1.409 us    693 k
+poolBenchmark                          100000  16.73 ms  167.3 ns    5.7 M
+
+Buffer size: 10000, number of buffers: 10
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+reserveBenchmark                       100000    234 ms   2.34 us  417.3 k
+chainBenchmark                         100000  142.3 ms  1.423 us  686.1 k
+poolBenchmark                          100000  16.78 ms  167.8 ns  5.684 M
+
+Buffer size: 100000, number of buffers: 10
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+reserveBenchmark                       100000  186.5 ms  1.865 us  523.5 k
+chainBenchmark                         100000  360.5 ms  3.605 us  270.9 k
+poolBenchmark                          100000  16.52 ms  165.2 ns  5.772 M
+
+Buffer size: 1000000, number of buffers: 10
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+reserveBenchmark                          156  2.084 s   13.36 ms  74.84
+chainBenchmark                          30082  2.001 s    66.5 us  14.68 k
+poolBenchmark                          100000  18.18 ms  181.8 ns  5.244 M
+
+
+Buffer size: 10, number of buffers: 20
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+reserveBenchmark                       100000  12.54 ms  125.4 ns  7.603 M
+chainBenchmark                         100000  118.6 ms  1.186 us  823.2 k
+poolBenchmark                          100000   32.2 ms    322 ns  2.962 M
+*/
+int main(int argc, char** argv) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  setNumbers(10, 10);
+  folly::runBenchmarks();
+  setNumbers(100, 10);
+  folly::runBenchmarks();
+  setNumbers(2048, 10);
+  folly::runBenchmarks();
+  setNumbers(10000, 10);
+  folly::runBenchmarks();
+  setNumbers(100000, 10);
+  folly::runBenchmarks();
+  setNumbers(1000000, 10);
+  folly::runBenchmarks();
+
+  setNumbers(10, 20);
+  folly::runBenchmarks();
+
+  return 0;
+}
diff --git a/folly/experimental/test/BitsTest.cpp b/folly/experimental/test/BitsTest.cpp
new file mode 100644
index 00000000..32dad6a2
--- /dev/null
+++ b/folly/experimental/test/BitsTest.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/experimental/Bits.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using namespace folly;
+
+TEST(Bits, Simple) {
+  EXPECT_EQ(0, Bits<uint8_t>::blockCount(0));
+  EXPECT_EQ(1, Bits<uint8_t>::blockCount(1));
+  EXPECT_EQ(1, Bits<uint8_t>::blockCount(8));
+  EXPECT_EQ(2, Bits<uint8_t>::blockCount(9));
+  EXPECT_EQ(256, Bits<uint8_t>::blockCount(2048));
+  EXPECT_EQ(257, Bits<uint8_t>::blockCount(2049));
+
+  EXPECT_EQ(4, Bits<uint8_t>::blockIndex(39));
+  EXPECT_EQ(7, Bits<uint8_t>::bitOffset(39));
+  EXPECT_EQ(5, Bits<uint8_t>::blockIndex(40));
+  EXPECT_EQ(0, Bits<uint8_t>::bitOffset(40));
+
+  uint8_t buf[256];
+  memset(buf, 0, 256);
+
+  Bits<uint8_t>::set(buf, 36);
+  Bits<uint8_t>::set(buf, 39);
+  EXPECT_EQ((1 << 7) | (1 << 4), buf[4]);
+  EXPECT_EQ(0, buf[5]);
+  Bits<uint8_t>::clear(buf, 39);
+  EXPECT_EQ(1 << 4, buf[4]);
+  EXPECT_EQ(0, buf[5]);
+  Bits<uint8_t>::set(buf, 40);
+  EXPECT_EQ(1 << 4, buf[4]);
+  EXPECT_EQ(1, buf[5]);
+
+  EXPECT_EQ(2, Bits<uint8_t>::count(buf, buf + 256));
+}
+
+int main(int argc, char *argv[]) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/folly/experimental/test/TestUtilTest.cpp b/folly/experimental/test/TestUtilTest.cpp
new file mode 100644
index 00000000..d28319b2
--- /dev/null
+++ b/folly/experimental/test/TestUtilTest.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/experimental/TestUtil.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using namespace folly;
+using namespace folly::test;
+
+TEST(TemporaryFile, Simple) {
+  int fd = -1;
+  char c = 'x';
+  {
+    TemporaryFile f;
+    EXPECT_FALSE(f.path().empty());
+    EXPECT_EQ('/', f.path()[0]);
+    fd = f.fd();
+    EXPECT_LE(0, fd);
+    ssize_t r = write(fd, &c, 1);
+    EXPECT_EQ(1, r);
+  }
+
+  // The file must have been closed.  This assumes that no other thread
+  // has opened another file in the meanwhile, which is a sane assumption
+  // to make in this test.
+  ssize_t r = write(fd, &c, 1);
+  int savedErrno = errno;
+  EXPECT_EQ(-1, r);
+  EXPECT_EQ(EBADF, savedErrno);
+}
+
+int main(int argc, char *argv[]) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
+
diff --git a/folly/folly-config.h b/folly/folly-config.h
new file mode 100644
index 00000000..16a87eed
--- /dev/null
+++ b/folly/folly-config.h
@@ -0,0 +1,303 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _FOLLY_CONFIG_H
+#define _FOLLY_CONFIG_H 1
+
+/* folly-config.h. Generated automatically at end of configure. */
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* define if the Boost library is available */
+#ifndef FOLLY_HAVE_BOOST
+#define FOLLY_HAVE_BOOST /**/
+#endif
+
+/* define if the Boost::Regex library is available */
+#ifndef FOLLY_HAVE_BOOST_REGEX
+#define FOLLY_HAVE_BOOST_REGEX /**/
+#endif
+
+/* define if the Boost::Thread library is available */
+#ifndef FOLLY_HAVE_BOOST_THREAD
+#define FOLLY_HAVE_BOOST_THREAD /**/
+#endif
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#ifndef FOLLY_HAVE_DLFCN_H
+#define FOLLY_HAVE_DLFCN_H 1
+#endif
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#ifndef FOLLY_HAVE_FCNTL_H
+#define FOLLY_HAVE_FCNTL_H 1
+#endif
+
+/* Define to 1 if you have the <features.h> header file. */
+#ifndef FOLLY_HAVE_FEATURES_H
+#define FOLLY_HAVE_FEATURES_H 1
+#endif
+
+/* Define to 1 if you have the `ffsll' function. */
+#ifndef FOLLY_HAVE_FFSLL
+#define FOLLY_HAVE_FFSLL 1
+#endif
+
+/* Define to 1 if you have the `getdelim' function. */
+#ifndef FOLLY_HAVE_GETDELIM
+#define FOLLY_HAVE_GETDELIM 1
+#endif
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#ifndef FOLLY_HAVE_GETTIMEOFDAY
+#define FOLLY_HAVE_GETTIMEOFDAY 1
+#endif
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#ifndef FOLLY_HAVE_INTTYPES_H
+#define FOLLY_HAVE_INTTYPES_H 1
+#endif
+
+/* Define to 1 if you have the `double_conversion' library
+   (-ldouble_conversion). */
+#ifndef FOLLY_HAVE_LIBDOUBLE_CONVERSION
+#define FOLLY_HAVE_LIBDOUBLE_CONVERSION 1
+#endif
+
+/* Define to 1 if you have the `gflags' library (-lgflags). */
+#ifndef FOLLY_HAVE_LIBGFLAGS
+#define FOLLY_HAVE_LIBGFLAGS 1
+#endif
+
+/* Define to 1 if you have the `glog' library (-lglog). */
+#ifndef FOLLY_HAVE_LIBGLOG
+#define FOLLY_HAVE_LIBGLOG 1
+#endif
+
+/* Define to 1 if you have the `gtest' library (-lgtest). */
+#ifndef FOLLY_HAVE_LIBGTEST
+#define FOLLY_HAVE_LIBGTEST 1
+#endif
+
+/* Define to 1 if you have the `gtest_main' library (-lgtest_main). */
+#ifndef FOLLY_HAVE_LIBGTEST_MAIN
+#define FOLLY_HAVE_LIBGTEST_MAIN 1
+#endif
+
+/* Define to 1 if you have the `jemalloc' library (-ljemalloc). */
+#ifndef FOLLY_HAVE_LIBJEMALLOC
+#define FOLLY_HAVE_LIBJEMALLOC 1
+#endif
+
+/* Define to 1 if you have the `tcmalloc' library (-ltcmalloc). */
+#ifndef FOLLY_HAVE_LIBTCMALLOC
+#define FOLLY_HAVE_LIBTCMALLOC 1
+#endif
+
+/* Define to 1 if you have the <limits.h> header file. */
+#ifndef FOLLY_HAVE_LIMITS_H
+#define FOLLY_HAVE_LIMITS_H 1
+#endif
+
+/* Define to 1 if you have the `malloc_size' function. */
+/* #undef HAVE_MALLOC_SIZE */
+
+/* Define to 1 if you have the `malloc_usable_size' function. */
+#ifndef FOLLY_HAVE_MALLOC_USABLE_SIZE
+#define FOLLY_HAVE_MALLOC_USABLE_SIZE 1
+#endif
+
+/* Define to 1 if you have the `memmove' function. */
+#ifndef FOLLY_HAVE_MEMMOVE
+#define FOLLY_HAVE_MEMMOVE 1
+#endif
+
+/* Define to 1 if you have the <memory.h> header file. */
+#ifndef FOLLY_HAVE_MEMORY_H
+#define FOLLY_HAVE_MEMORY_H 1
+#endif
+
+/* Define to 1 if you have the `memset' function. */
+#ifndef FOLLY_HAVE_MEMSET
+#define FOLLY_HAVE_MEMSET 1
+#endif
+
+/* Define to 1 if you have the <mutex.h> header file. */
+/* #undef HAVE_MUTEX_H */
+
+/* Define to 1 if you have the `pow' function. */
+#ifndef FOLLY_HAVE_POW
+#define FOLLY_HAVE_POW 1
+#endif
+
+/* Define to 1 if you have the `pthread_yield' function. */
+/* #undef HAVE_PTHREAD_YIELD */
+
+/* Define to 1 if the system has the type `ptrdiff_t'. */
+#ifndef FOLLY_HAVE_PTRDIFF_T
+#define FOLLY_HAVE_PTRDIFF_T 1
+#endif
+
+/* Define to 1 if you have the `rallocm' function. */
+/* #undef HAVE_RALLOCM */
+
+/* Define to 1 if you have the <sched.h> header file. */
+#ifndef FOLLY_HAVE_SCHED_H
+#define FOLLY_HAVE_SCHED_H 1
+#endif
+
+/* Define to 1 if you have the `sched_yield' function. */
+#ifndef FOLLY_HAVE_SCHED_YIELD
+#define FOLLY_HAVE_SCHED_YIELD 1
+#endif
+
+/* Define to 1 if stdbool.h conforms to C99. */
+#ifndef FOLLY_HAVE_STDBOOL_H
+#define FOLLY_HAVE_STDBOOL_H 1
+#endif
+
+/* Define if g++ supports C++0x features. */
+#ifndef FOLLY_HAVE_STDCXX_0X
+#define FOLLY_HAVE_STDCXX_0X /**/
+#endif
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#ifndef FOLLY_HAVE_STDINT_H
+#define FOLLY_HAVE_STDINT_H 1
+#endif
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#ifndef FOLLY_HAVE_STDLIB_H
+#define FOLLY_HAVE_STDLIB_H 1
+#endif
+
+/* Define to 1 if you have the `strerror' function. */
+#ifndef FOLLY_HAVE_STRERROR
+#define FOLLY_HAVE_STRERROR 1
+#endif
+
+/* Define to 1 if you have the <strings.h> header file. */
+#ifndef FOLLY_HAVE_STRINGS_H
+#define FOLLY_HAVE_STRINGS_H 1
+#endif
+
+/* Define to 1 if you have the <string.h> header file. */
+#ifndef FOLLY_HAVE_STRING_H
+#define FOLLY_HAVE_STRING_H 1
+#endif
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#ifndef FOLLY_HAVE_SYS_STAT_H
+#define FOLLY_HAVE_SYS_STAT_H 1
+#endif
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#ifndef FOLLY_HAVE_SYS_TIME_H
+#define FOLLY_HAVE_SYS_TIME_H 1
+#endif
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#ifndef FOLLY_HAVE_SYS_TYPES_H
+#define FOLLY_HAVE_SYS_TYPES_H 1
+#endif
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#ifndef FOLLY_HAVE_UNISTD_H
+#define FOLLY_HAVE_UNISTD_H 1
+#endif
+
+/* Define to 1 if you have the <malloc.h> header file. */
+#ifndef FOLLY_HAVE_MALLOC_H
+#define FOLLY_HAVE_MALLOC_H 1
+#endif
+
+/* Define to 1 if the system has the type `_Bool'. */
+/* #undef HAVE__BOOL */
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#ifndef FOLLY_LT_OBJDIR
+#define FOLLY_LT_OBJDIR ".libs/"
+#endif
+
+/* Name of package */
+#ifndef FOLLY_PACKAGE
+#define FOLLY_PACKAGE "folly"
+#endif
+
+/* Define to the address where bug reports for this package should be sent. */
+#ifndef FOLLY_PACKAGE_BUGREPORT
+#define FOLLY_PACKAGE_BUGREPORT "folly@fb.com"
+#endif
+
+/* Define to the full name of this package. */
+#ifndef FOLLY_PACKAGE_NAME
+#define FOLLY_PACKAGE_NAME "folly"
+#endif
+
+/* Define to the full name and version of this package. */
+#ifndef FOLLY_PACKAGE_STRING
+#define FOLLY_PACKAGE_STRING "folly 0.1"
+#endif
+
+/* Define to the one symbol short name of this package. */
+#ifndef FOLLY_PACKAGE_TARNAME
+#define FOLLY_PACKAGE_TARNAME "folly"
+#endif
+
+/* Define to the home page for this package. */
+#ifndef FOLLY_PACKAGE_URL
+#define FOLLY_PACKAGE_URL ""
+#endif
+
+/* Define to the version of this package. */
+#ifndef FOLLY_PACKAGE_VERSION
+#define FOLLY_PACKAGE_VERSION "0.1"
+#endif
+
+/* Define to 1 if you have the ANSI C header files. */
+#ifndef FOLLY_STDC_HEADERS
+#define FOLLY_STDC_HEADERS 1
+#endif
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#ifndef FOLLY_TIME_WITH_SYS_TIME
+#define FOLLY_TIME_WITH_SYS_TIME 1
+#endif
+
+/* Version number of package */
+#ifndef FOLLY_VERSION
+#define FOLLY_VERSION "0.1"
+#endif
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef const */
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+/* #undef inline */
+#endif
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef size_t */
+
+/* Define to empty if the keyword `volatile' does not work. Warning: valid
+   code using `volatile' can become incorrect without. Disable with care. */
+/* #undef volatile */
+
+/* once: _FOLLY_CONFIG_H */
+#endif
diff --git a/folly/json.cpp b/folly/json.cpp
new file mode 100644
index 00000000..bf5320d2
--- /dev/null
+++ b/folly/json.cpp
@@ -0,0 +1,696 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/json.h"
+#include <cassert>
+#include <boost/next_prior.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "folly/Range.h"
+#include "folly/Unicode.h"
+#include "folly/Conv.h"
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+namespace json {
+namespace {
+
+char32_t decodeUtf8(const char*& p, const char* const e) {
+  /* The following encodings are valid, except for the 5 and 6 byte
+   * combinations:
+   * 0xxxxxxx
+   * 110xxxxx 10xxxxxx
+   * 1110xxxx 10xxxxxx 10xxxxxx
+   * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+   * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+   * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+   */
+
+  if (p >= e) {
+    throw std::runtime_error("folly::decodeUtf8 empty/invalid string");
+  }
+
+  unsigned char fst = *p;
+  if (!(fst & 0x80)) {
+    // trivial case
+    return *p++;
+  }
+
+  static const uint32_t bitMask[] = {
+    (1 << 7) - 1,
+    (1 << 11) - 1,
+    (1 << 16) - 1,
+    (1 << 21) - 1
+  };
+
+  // upper control bits are masked out later
+  uint32_t d = fst;
+
+  if ((fst & 0xC0) != 0xC0) {
+    throw std::runtime_error(
+      to<std::string>("folly::decodeUtf8 i=0 d=", d));
+  }
+
+  fst <<= 1;
+
+  for (unsigned int i = 1; i != 3 && p + i < e; ++i) {
+    unsigned char tmp = p[i];
+
+    if ((tmp & 0xC0) != 0x80) {
+      throw std::runtime_error(
+        to<std::string>("folly::decodeUtf8 i=", i, " tmp=", (uint32_t)tmp));
+    }
+
+    d = (d << 6) | (tmp & 0x3F);
+    fst <<= 1;
+
+    if (!(fst & 0x80)) {
+      d &= bitMask[i];
+
+      // overlong, could have been encoded with i bytes
+      if ((d & ~bitMask[i - 1]) == 0) {
+        throw std::runtime_error(
+          to<std::string>("folly::decodeUtf8 i=", i, " d=", d));
+      }
+
+      // check for surrogates only needed for 3 bytes
+      if (i == 2) {
+        if ((d >= 0xD800 && d <= 0xDFFF) || d > 0x10FFFF) {
+          throw std::runtime_error(
+            to<std::string>("folly::decodeUtf8 i=", i, " d=", d));
+        }
+      }
+
+      p += i + 1;
+      return d;
+    }
+  }
+
+  throw std::runtime_error("folly::decodeUtf8 encoding length maxed out");
+}
+
+// Escape a string so that it is legal to print it in JSON text.
+void escapeString(StringPiece input,
+                  fbstring& out,
+                  const serialization_opts& opts) {
+  auto hexDigit = [] (int c) -> char {
+    return c < 10 ? c + '0' : c - 10 + 'a';
+  };
+
+  out.reserve(out.size() + input.size() + 2);
+  out.push_back('\"');
+
+  const char* p = input.begin();
+  const char* q = input.begin();
+  const char* const e = input.end();
+
+  while (p < e) {
+    // Since non-ascii encoding inherently does utf8 validation
+    // we explicitly validate utf8 only if non-ascii encoding is disabled.
+    if (opts.validate_utf8 && !opts.encode_non_ascii) {
+      // to achieve better spatial and temporal coherence
+      // we do utf8 validation progressively along with the
+      // string-escaping instead of two separate passes
+
+      // as the encoding progresses, q will stay at or ahead of p
+      CHECK(q >= p);
+
+      // as p catches up with q, move q forward
+      if (q == p) {
+        // calling utf8_decode has the side effect of
+        // checking that utf8 encodings are valid
+        decodeUtf8(q, e);
+      }
+    }
+
+    if (opts.encode_non_ascii && (*p & 0x80)) {
+      char32_t v = decodeUtf8(p, e);
+      out.append("\\u");
+      out.push_back(hexDigit(v >> 12));
+      out.push_back(hexDigit((v >> 8) & 0x0f));
+      out.push_back(hexDigit((v >> 4) & 0x0f));
+      out.push_back(hexDigit(v & 0x0f));
+      continue;
+    }
+    if (*p == '\\' || *p == '\"') {
+      out.push_back('\\');
+      out.push_back(*p++);
+      continue;
+    }
+    if (*p <= 0x1f) {
+      // note that this if condition captures both control characters
+      // and extended ascii characters
+      out.append("\\u00");
+      out.push_back(hexDigit((*p & 0xf0) >> 4));
+      out.push_back(hexDigit(*p & 0xf));
+      p++;
+      continue;
+    }
+    out.push_back(*p++);
+  }
+
+  out.push_back('\"');
+}
+
+struct Printer {
+  explicit Printer(fbstring& out,
+                   unsigned* indentLevel,
+                   serialization_opts const* opts)
+    : out_(out)
+    , indentLevel_(indentLevel)
+    , opts_(*opts)
+  {}
+
+  void operator()(dynamic const& v) const {
+    switch (v.type()) {
+    case dynamic::DOUBLE:
+      toAppend(v.asDouble(), &out_);
+      break;
+    case dynamic::INT64: {
+      auto intval = v.asInt();
+      if (opts_.javascript_safe) {
+        // Use folly::to to check that this integer can be represented
+        // as a double without loss of precision.
+        intval = int64_t(to<double>(intval));
+      }
+      toAppend(intval, &out_);
+      break;
+    }
+    case dynamic::BOOL:
+      out_ += v.asBool() ? "true" : "false";
+      break;
+    case dynamic::NULLT:
+      out_ += "null";
+      break;
+    case dynamic::STRING:
+      escapeString(v.asString(), out_, opts_);
+      break;
+    case dynamic::OBJECT:
+      printObject(v);
+      break;
+    case dynamic::ARRAY:
+      printArray(v);
+      break;
+    default:
+      CHECK(0) << "Bad type " << v.type();
+    }
+  }
+
+private:
+  void printKV(const std::pair<dynamic, dynamic>& p) const {
+    if (!opts_.allow_non_string_keys && !p.first.isString()) {
+      throw std::runtime_error("folly::toJson: JSON object key was not a "
+        "string");
+    }
+    (*this)(p.first);
+    mapColon();
+    (*this)(p.second);
+  }
+
+  void printObject(dynamic const& o) const {
+    if (o.empty()) {
+      out_ += "{}";
+      return;
+    }
+
+    out_ += '{';
+    indent();
+    newline();
+    auto it = o.items().begin();
+    printKV(*it);
+    for (++it; it != o.items().end(); ++it) {
+      out_ += ',';
+      newline();
+      printKV(*it);
+    }
+    outdent();
+    newline();
+    out_ += '}';
+  }
+
+  void printArray(dynamic const& a) const {
+    if (a.empty()) {
+      out_ += "[]";
+      return;
+    }
+
+    out_ += '[';
+    indent();
+    newline();
+    (*this)(a[0]);
+    for (auto& val : makeRange(boost::next(a.begin()), a.end())) {
+      out_ += ',';
+      newline();
+      (*this)(val);
+    }
+    outdent();
+    newline();
+    out_ += ']';
+  }
+
+private:
+  void outdent() const {
+    if (indentLevel_) {
+      --*indentLevel_;
+    }
+  }
+
+  void indent() const {
+    if (indentLevel_) {
+      ++*indentLevel_;
+    }
+  }
+
+  void newline() const {
+    if (indentLevel_) {
+      out_ += to<fbstring>('\n', fbstring(*indentLevel_ * 2, ' '));
+    }
+  }
+
+  void mapColon() const {
+    out_ += indentLevel_ ? " : " : ":";
+  }
+
+private:
+  fbstring& out_;
+  unsigned* const indentLevel_;
+  serialization_opts const& opts_;
+};
+
+//////////////////////////////////////////////////////////////////////
+
+struct ParseError : std::runtime_error {
+  explicit ParseError(int line)
+    : std::runtime_error(to<std::string>("json parse error on line ", line))
+  {}
+
+  explicit ParseError(int line, std::string const& context,
+      std::string const& expected)
+    : std::runtime_error(to<std::string>("json parse error on line ", line,
+        !context.empty() ? to<std::string>(" near `", context, '\'')
+                        : "",
+        ": ", expected))
+  {}
+
+  explicit ParseError(std::string const& what)
+    : std::runtime_error("json parse error: " + what)
+  {}
+};
+
+// Wraps our input buffer with some helper functions.
+struct Input {
+  explicit Input(StringPiece range)
+    : range_(range)
+    , lineNum_(0)
+  {
+    storeCurrent();
+  }
+
+  Input(Input const&) = delete;
+  Input& operator=(Input const&) = delete;
+
+  char const* begin() const { return range_.begin(); }
+
+  // Parse ahead for as long as the supplied predicate is satisfied,
+  // returning a range of what was skipped.
+  template<class Predicate>
+  StringPiece skipWhile(const Predicate& p) {
+    std::size_t skipped = 0;
+    for (; skipped < range_.size(); ++skipped) {
+      if (!p(range_[skipped])) {
+        break;
+      }
+      if (range_[skipped] == '\n') {
+        ++lineNum_;
+      }
+    }
+    auto ret = range_.subpiece(0, skipped);
+    range_.advance(skipped);
+    storeCurrent();
+    return ret;
+  }
+
+  StringPiece skipDigits() {
+    return skipWhile([] (char c) { return c >= '0' && c <= '9'; });
+  }
+
+  void skipWhitespace() {
+    // Spaces other than ' ' characters are less common but should be
+    // checked.  This configuration where we loop on the ' '
+    // separately from oddspaces was empirically fastest.
+    auto oddspace = [] (char c) {
+      return c == '\n' || c == '\t' || c == '\r';
+    };
+
+  loop:
+    for (; !range_.empty() && range_.front() == ' '; range_.pop_front()) {
+    }
+    if (!range_.empty() && oddspace(range_.front())) {
+      range_.pop_front();
+      goto loop;
+    }
+    storeCurrent();
+  }
+
+  void expect(char c) {
+    if (**this != c) {
+      throw ParseError(lineNum_, context(),
+        to<std::string>("expected '", c, '\''));
+    }
+    ++*this;
+  }
+
+  std::size_t size() const {
+    return range_.size();
+  }
+
+  int operator*() const {
+    return current_;
+  }
+
+  void operator++() {
+    range_.pop_front();
+    storeCurrent();
+  }
+
+  template<class T>
+  T extract() {
+    try {
+      return to<T>(&range_);
+    } catch (std::exception const& e) {
+      error(e.what());
+    }
+  }
+
+  bool consume(StringPiece str) {
+    if (boost::starts_with(range_, str)) {
+      range_.advance(str.size());
+      storeCurrent();
+      return true;
+    }
+    return false;
+  }
+
+  std::string context() const {
+    return range_.subpiece(0, 16 /* arbitrary */).toString();
+  }
+
+  dynamic error(char const* what) const {
+    throw ParseError(lineNum_, context(), what);
+  }
+
+private:
+  void storeCurrent() {
+    current_ = range_.empty() ? EOF : range_.front();
+  }
+
+private:
+  StringPiece range_;
+  unsigned lineNum_;
+  int current_;
+};
+
+dynamic parseValue(Input& in);
+fbstring parseString(Input& in);
+
+dynamic parseObject(Input& in) {
+  assert(*in == '{');
+  ++in;
+
+  dynamic ret = dynamic::object;
+
+  in.skipWhitespace();
+  if (*in == '}') {
+    ++in;
+    return ret;
+  }
+
+  for (;;) {
+    if (*in != '\"') {
+      in.error("expected string for object key name");
+    }
+    auto key = parseString(in);
+    in.skipWhitespace();
+    in.expect(':');
+    in.skipWhitespace();
+    ret.insert(std::move(key), parseValue(in));
+    in.skipWhitespace();
+    if (*in != ',') {
+      break;
+    }
+    ++in;
+    in.skipWhitespace();
+  }
+  in.expect('}');
+
+  return ret;
+}
+
+dynamic parseArray(Input& in) {
+  assert(*in == '[');
+  ++in;
+
+  dynamic ret = {};
+
+  in.skipWhitespace();
+  if (*in == ']') {
+    ++in;
+    return ret;
+  }
+
+  for (;;) {
+    ret.push_back(parseValue(in));
+    in.skipWhitespace();
+    if (*in != ',') {
+      break;
+    }
+    ++in;
+    in.skipWhitespace();
+  }
+  in.expect(']');
+
+  return ret;
+}
+
+dynamic parseNumber(Input& in) {
+  bool const negative = (*in == '-');
+  if (negative) {
+    ++in;
+    if (in.consume("Infinity")) {
+      return -std::numeric_limits<double>::infinity();
+    }
+  }
+
+  auto integral = in.skipDigits();
+  if (integral.empty()) {
+    in.error("expected digits after `-'");
+  }
+  auto const wasE = *in == 'e' || *in == 'E';
+  if (*in != '.' && !wasE) {
+    auto val = to<int64_t>(integral);
+    if (negative) {
+      val = -val;
+    }
+    in.skipWhitespace();
+    return val;
+  }
+
+  auto end = !wasE ? (++in, in.skipDigits().end()) : in.begin();
+  if (*in == 'e' || *in == 'E') {
+    ++in;
+    if (*in == '+' || *in == '-') {
+      ++in;
+    }
+    auto expPart = in.skipDigits();
+    end = expPart.end();
+  }
+  auto fullNum = makeRange(integral.begin(), end);
+
+  auto val = to<double>(fullNum);
+  if (negative) {
+    val *= -1;
+  }
+  return val;
+}
+
+fbstring decodeUnicodeEscape(Input& in) {
+  auto hexVal = [&] (char c) -> unsigned {
+    return c >= '0' && c <= '9' ? c - '0' :
+           c >= 'a' && c <= 'f' ? c - 'a' + 10 :
+           c >= 'A' && c <= 'F' ? c - 'A' + 10 :
+           (in.error("invalid hex digit"), 0);
+  };
+
+  auto readHex = [&] {
+    if (in.size() < 4) {
+      in.error("expected 4 hex digits");
+    }
+
+    uint16_t ret = hexVal(*in) * 4096;
+    ++in;
+    ret += hexVal(*in) * 256;
+    ++in;
+    ret += hexVal(*in) * 16;
+    ++in;
+    ret += hexVal(*in);
+    ++in;
+    return ret;
+  };
+
+  /*
+   * If the value encoded is in the surrogate pair range, we need to
+   * make sure there is another escape that we can use also.
+   */
+  uint32_t codePoint = readHex();
+  if (codePoint >= 0xd800 && codePoint <= 0xdbff) {
+    if (!in.consume("\\u")) {
+      in.error("expected another unicode escape for second half of "
+        "surrogate pair");
+    }
+    uint16_t second = readHex();
+    if (second >= 0xdc00 && second <= 0xdfff) {
+      codePoint = 0x10000 + ((codePoint & 0x3ff) << 10) +
+                  (second & 0x3ff);
+    } else {
+      in.error("second character in surrogate pair is invalid");
+    }
+  } else if (codePoint >= 0xdc00 && codePoint <= 0xdfff) {
+    in.error("invalid unicode code point (in range [0xdc00,0xdfff])");
+  }
+
+  return codePointToUtf8(codePoint);
+}
+
+fbstring parseString(Input& in) {
+  assert(*in == '\"');
+  ++in;
+
+  fbstring ret;
+  for (;;) {
+    auto range = in.skipWhile(
+      [] (char c) { return c != '\"' && c != '\\'; }
+    );
+    ret.append(range.begin(), range.end());
+
+    if (*in == '\"') {
+      ++in;
+      break;
+    }
+    if (*in == '\\') {
+      ++in;
+      switch (*in) {
+      case '\"':    ret.push_back('\"'); ++in; break;
+      case '\\':    ret.push_back('\\'); ++in; break;
+      case '/':     ret.push_back('/');  ++in; break;
+      case 'b':     ret.push_back('\b'); ++in; break;
+      case 'f':     ret.push_back('\f'); ++in; break;
+      case 'n':     ret.push_back('\n'); ++in; break;
+      case 'r':     ret.push_back('\r'); ++in; break;
+      case 't':     ret.push_back('\t'); ++in; break;
+      case 'u':     ++in; ret += decodeUnicodeEscape(in); break;
+      default:      in.error(to<fbstring>("unknown escape ", *in,
+                                          " in string").c_str());
+      }
+      continue;
+    }
+    if (*in == EOF) {
+      in.error("unterminated string");
+    }
+    if (!*in) {
+      /*
+       * Apparently we're actually supposed to ban all control
+       * characters from strings.  This seems unnecessarily
+       * restrictive, so we're only banning zero bytes.  (Since the
+       * string is presumed to be UTF-8 encoded it's fine to just
+       * check this way.)
+       */
+      in.error("null byte in string");
+    }
+
+    ret.push_back(*in);
+    ++in;
+  }
+
+  return ret;
+}
+
+dynamic parseValue(Input& in) {
+  in.skipWhitespace();
+  return *in == '[' ? parseArray(in) :
+         *in == '{' ? parseObject(in) :
+         *in == '\"' ? parseString(in) :
+         (*in == '-' || (*in >= '0' && *in <= '9')) ? parseNumber(in) :
+         in.consume("true") ? true :
+         in.consume("false") ? false :
+         in.consume("null") ? nullptr :
+         in.consume("Infinity") ? std::numeric_limits<double>::infinity() :
+         in.consume("NaN") ? std::numeric_limits<double>::quiet_NaN() :
+         in.error("expected json value");
+}
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+fbstring serialize(dynamic const& dyn, serialization_opts const& opts) {
+  fbstring ret;
+  unsigned indentLevel = 0;
+  Printer p(ret, opts.pretty_formatting ? &indentLevel : nullptr, &opts);
+  p(dyn);
+  return ret;
+}
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+dynamic parseJson(StringPiece range) {
+  json::Input in(range);
+
+  auto ret = parseValue(in);
+  in.skipWhitespace();
+  if (*in != '\0' && in.size()) {
+    in.error("parsing didn't consume all input");
+  }
+  return ret;
+}
+
+fbstring toJson(dynamic const& dyn) {
+  return json::serialize(dyn, json::serialization_opts());
+}
+
+fbstring toPrettyJson(dynamic const& dyn) {
+  json::serialization_opts opts;
+  opts.pretty_formatting = true;
+  return json::serialize(dyn, opts);
+}
+
+//////////////////////////////////////////////////////////////////////
+// dynamic::print_as_pseudo_json() is implemented here for header
+// ordering reasons (most of the dynamic implementation is in
+// dynamic-inl.h, which we don't want to include json.h).
+
+void dynamic::print_as_pseudo_json(std::ostream& out) const {
+  json::serialization_opts opts;
+  opts.allow_non_string_keys = true;
+  out << json::serialize(*this, opts);
+}
+
+//////////////////////////////////////////////////////////////////////
+
+}
diff --git a/folly/json.h b/folly/json.h
new file mode 100644
index 00000000..87d00662
--- /dev/null
+++ b/folly/json.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ * Serialize and deserialize folly::dynamic values as JSON.
+ *
+ * Before you use this you should probably understand the basic
+ * concepts in the JSON type system:
+ *
+ *    Value  : String | Bool | Null | Object | Array | Number
+ *    String : UTF-8 sequence
+ *    Object : (String, Value) pairs, with unique String keys
+ *    Array  : ordered list of Values
+ *    Null   : null
+ *    Bool   : true | false
+ *    Number : (representation unspecified)
+ *
+ * ... That's about it.  For more information see http://json.org or
+ * look up RFC 4627.
+ *
+ * If your dynamic has anything illegal with regard to this type
+ * system, the serializer will throw.
+ *
+ * @author Jordan DeLong <delong.j@fb.com>
+ */
+
+#ifndef FOLLY_JSON_H_
+#define FOLLY_JSON_H_
+
+#include "folly/dynamic.h"
+#include "folly/FBString.h"
+#include "folly/Range.h"
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+namespace json {
+
+  struct serialization_opts {
+    explicit serialization_opts()
+      : allow_non_string_keys(false)
+      , javascript_safe(false)
+      , pretty_formatting(false)
+      , encode_non_ascii(false)
+      , validate_utf8(false)
+    {}
+
+    // If true, keys in an object can be non-strings.  (In strict
+    // JSON, object keys must be strings.)  This is used by dynamic's
+    // operator<<.
+    bool allow_non_string_keys;
+
+    /*
+     * If true, refuse to serialize 64-bit numbers that cannot be
+     * precisely represented by fit a double---instead, throws an
+     * exception if the document contains this.
+     */
+    bool javascript_safe;
+
+    // If true, the serialized json will contain space and newlines to
+    // try to be minimally "pretty".
+    bool pretty_formatting;
+
+    // If true, non-ASCII utf8 characters would be encoded as \uXXXX.
+    bool encode_non_ascii;
+
+    // Check that strings are valid utf8
+    bool validate_utf8;
+  };
+
+  /*
+   * Main JSON serialization routine taking folly::dynamic parameters.
+   * For the most common use cases there are simpler functions in the
+   * main folly namespace below.
+   */
+  fbstring serialize(dynamic const&, serialization_opts const&);
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * Parse a json blob out of a range and produce a dynamic representing
+ * it.
+ */
+dynamic parseJson(StringPiece);
+
+/*
+ * Serialize a dynamic into a json string.
+ */
+fbstring toJson(dynamic const&);
+
+/*
+ * Same as the above, except format the json with some minimal
+ * indentation.
+ */
+fbstring toPrettyJson(dynamic const&);
+
+//////////////////////////////////////////////////////////////////////
+
+}
+
+#endif
diff --git a/folly/m4/ac_cxx_compile_stdcxx_0x.m4 b/folly/m4/ac_cxx_compile_stdcxx_0x.m4
new file mode 100644
index 00000000..5c1a3bd0
--- /dev/null
+++ b/folly/m4/ac_cxx_compile_stdcxx_0x.m4
@@ -0,0 +1,110 @@
+# ===========================================================================
+#        http://autoconf-archive.cryp.to/ac_cxx_compile_stdcxx_0x.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AC_CXX_COMPILE_STDCXX_0X
+#
+# DESCRIPTION
+#
+#   Check for baseline language coverage in the compiler for the C++0x
+#   standard.
+#
+# LAST MODIFICATION
+#
+#   2008-04-17
+#
+# COPYLEFT
+#
+#   Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved.
+
+AC_DEFUN([AC_CXX_COMPILE_STDCXX_0X], [
+  AC_CACHE_CHECK(if g++ supports C++0x features without additional flags,
+  ac_cv_cxx_compile_cxx0x_native,
+  [AC_LANG_SAVE
+  AC_LANG_CPLUSPLUS
+  AC_TRY_COMPILE([
+  template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+    typedef check<check<bool>> right_angle_brackets;
+
+    int a;
+    decltype(a) b;
+
+    typedef check<int> check_type;
+    check_type c;
+    check_type&& cr = static_cast<check_type&&>(c);],,
+  ac_cv_cxx_compile_cxx0x_native=yes, ac_cv_cxx_compile_cxx0x_native=no)
+  AC_LANG_RESTORE
+  ])
+
+  AC_CACHE_CHECK(if g++ supports C++0x features with -std=c++0x,
+  ac_cv_cxx_compile_cxx0x_cxx,
+  [AC_LANG_SAVE
+  AC_LANG_CPLUSPLUS
+  ac_save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -std=c++0x"
+  AC_TRY_COMPILE([
+  template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+    typedef check<check<bool>> right_angle_brackets;
+
+    int a;
+    decltype(a) b;
+
+    typedef check<int> check_type;
+    check_type c;
+    check_type&& cr = static_cast<check_type&&>(c);],,
+  ac_cv_cxx_compile_cxx0x_cxx=yes, ac_cv_cxx_compile_cxx0x_cxx=no)
+  CXXFLAGS="$ac_save_CXXFLAGS"
+  AC_LANG_RESTORE
+  ])
+
+  AC_CACHE_CHECK(if g++ supports C++0x features with -std=gnu++0x,
+  ac_cv_cxx_compile_cxx0x_gxx,
+  [AC_LANG_SAVE
+  AC_LANG_CPLUSPLUS
+  ac_save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -std=gnu++0x"
+  AC_TRY_COMPILE([
+  template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+    typedef check<check<bool>> right_angle_brackets;
+
+    int a;
+    decltype(a) b;
+
+    typedef check<int> check_type;
+    check_type c;
+    check_type&& cr = static_cast<check_type&&>(c);],,
+  ac_cv_cxx_compile_cxx0x_gxx=yes, ac_cv_cxx_compile_cxx0x_gxx=no)
+  CXXFLAGS="$ac_save_CXXFLAGS"
+  AC_LANG_RESTORE
+  ])
+
+  if test "$ac_cv_cxx_compile_cxx0x_native" = yes ||
+     test "$ac_cv_cxx_compile_cxx0x_cxx" = yes ||
+     test "$ac_cv_cxx_compile_cxx0x_gxx" = yes; then
+    AC_DEFINE(HAVE_STDCXX_0X,,[Define if g++ supports C++0x features. ])
+  else
+    AC_MSG_ERROR([Could not find cxx0x support in g++])				
+  fi
+])
+ 
\ No newline at end of file
diff --git a/folly/m4/ax_boost_base.m4 b/folly/m4/ax_boost_base.m4
new file mode 100644
index 00000000..35077885
--- /dev/null
+++ b/folly/m4/ax_boost_base.m4
@@ -0,0 +1,258 @@
+# ===========================================================================
+#       http://www.gnu.org/software/autoconf-archive/ax_boost_base.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+# DESCRIPTION
+#
+#   Test for the Boost C++ libraries of a particular version (or newer)
+#
+#   If no path to the installed boost library is given the macro searchs
+#   under /usr, /usr/local, /opt and /opt/local and evaluates the
+#   $BOOST_ROOT environment variable. Further documentation is available at
+#   <http://randspringer.de/boost/index.html>.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS)
+#
+#   And sets:
+#
+#     HAVE_BOOST
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
+#   Copyright (c) 2009 Peter Adolphs
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 20
+
+AC_DEFUN([AX_BOOST_BASE],
+[
+AC_ARG_WITH([boost],
+  [AS_HELP_STRING([--with-boost@<:@=ARG@:>@],
+    [use Boost library from a standard location (ARG=yes),
+     from the specified location (ARG=<path>),
+     or disable it (ARG=no)
+     @<:@ARG=yes@:>@ ])],
+    [
+    if test "$withval" = "no"; then
+        want_boost="no"
+    elif test "$withval" = "yes"; then
+        want_boost="yes"
+        ac_boost_path=""
+    else
+        want_boost="yes"
+        ac_boost_path="$withval"
+    fi
+    ],
+    [want_boost="yes"])
+
+
+AC_ARG_WITH([boost-libdir],
+        AS_HELP_STRING([--with-boost-libdir=LIB_DIR],
+        [Force given directory for boost libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]),
+        [
+        if test -d "$withval"
+        then
+                ac_boost_lib_path="$withval"
+        else
+                AC_MSG_ERROR(--with-boost-libdir expected directory name)
+        fi
+        ],
+        [ac_boost_lib_path=""]
+)
+
+if test "x$want_boost" = "xyes"; then
+    boost_lib_version_req=ifelse([$1], ,1.20.0,$1)
+    boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'`
+    boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'`
+    boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'`
+    boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
+    if test "x$boost_lib_version_req_sub_minor" = "x" ; then
+        boost_lib_version_req_sub_minor="0"
+        fi
+    WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+  $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor`
+    AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req)
+    succeeded=no
+
+    dnl On 64-bit systems check for system libraries in both lib64 and lib.
+    dnl The former is specified by FHS, but e.g. Debian does not adhere to
+    dnl this (as it rises problems for generic multi-arch support).
+    dnl The last entry in the list is chosen by default when no libraries
+    dnl are found, e.g. when only header-only libraries are installed!
+    libsubdirs="lib"
+    ax_arch=`uname -m`
+    if test $ax_arch = x86_64 -o $ax_arch = ppc64 -o $ax_arch = s390x -o $ax_arch = sparc64; then
+        libsubdirs="lib64 lib lib64"
+    fi
+
+    dnl first we check the system location for boost libraries
+    dnl this location ist chosen if boost libraries are installed with the --layout=system option
+    dnl or if you install boost with RPM
+    if test "$ac_boost_path" != ""; then
+        BOOST_CPPFLAGS="-I$ac_boost_path/include"
+        for ac_boost_path_tmp in $libsubdirs; do
+                if test -d "$ac_boost_path"/"$ac_boost_path_tmp" ; then
+                        BOOST_LDFLAGS="-L$ac_boost_path/$ac_boost_path_tmp"
+                        break
+                fi
+        done
+    elif test "$cross_compiling" != yes; then
+        for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do
+            if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then
+                for libsubdir in $libsubdirs ; do
+                    if ls "$ac_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
+                done
+                BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir"
+                BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
+                break;
+            fi
+        done
+    fi
+
+    dnl overwrite ld flags if we have required special directory with
+    dnl --with-boost-libdir parameter
+    if test "$ac_boost_lib_path" != ""; then
+       BOOST_LDFLAGS="-L$ac_boost_lib_path"
+    fi
+
+    CPPFLAGS_SAVED="$CPPFLAGS"
+    CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+    export CPPFLAGS
+
+    LDFLAGS_SAVED="$LDFLAGS"
+    LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+    export LDFLAGS
+
+    AC_REQUIRE([AC_PROG_CXX])
+    AC_LANG_PUSH(C++)
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+    @%:@include <boost/version.hpp>
+    ]], [[
+    #if BOOST_VERSION >= $WANT_BOOST_VERSION
+    // Everything is okay
+    #else
+    #  error Boost version is too old
+    #endif
+    ]])],[
+        AC_MSG_RESULT(yes)
+    succeeded=yes
+    found_system=yes
+        ],[
+        ])
+    AC_LANG_POP([C++])
+
+
+
+    dnl if we found no boost with system layout we search for boost libraries
+    dnl built and installed without the --layout=system option or for a staged(not installed) version
+    if test "x$succeeded" != "xyes"; then
+        _version=0
+        if test "$ac_boost_path" != ""; then
+            if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
+                for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
+                    _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
+                    V_CHECK=`expr $_version_tmp \> $_version`
+                    if test "$V_CHECK" = "1" ; then
+                        _version=$_version_tmp
+                    fi
+                    VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
+                    BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
+                done
+            fi
+        else
+            if test "$cross_compiling" != yes; then
+                for ac_boost_path in /usr /usr/local /opt /opt/local ; do
+                    if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
+                        for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
+                            _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
+                            V_CHECK=`expr $_version_tmp \> $_version`
+                            if test "$V_CHECK" = "1" ; then
+                                _version=$_version_tmp
+                                best_path=$ac_boost_path
+                            fi
+                        done
+                    fi
+                done
+
+                VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
+                BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
+                if test "$ac_boost_lib_path" = ""; then
+                    for libsubdir in $libsubdirs ; do
+                        if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
+                    done
+                    BOOST_LDFLAGS="-L$best_path/$libsubdir"
+                fi
+            fi
+
+            if test "x$BOOST_ROOT" != "x"; then
+                for libsubdir in $libsubdirs ; do
+                    if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
+                done
+                if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then
+                    version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'`
+                    stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'`
+                        stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'`
+                    V_CHECK=`expr $stage_version_shorten \>\= $_version`
+                    if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then
+                        AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
+                        BOOST_CPPFLAGS="-I$BOOST_ROOT"
+                        BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir"
+                    fi
+                fi
+            fi
+        fi
+
+        CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+        export CPPFLAGS
+        LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+        export LDFLAGS
+
+        AC_LANG_PUSH(C++)
+            AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+        @%:@include <boost/version.hpp>
+        ]], [[
+        #if BOOST_VERSION >= $WANT_BOOST_VERSION
+        // Everything is okay
+        #else
+        #  error Boost version is too old
+        #endif
+        ]])],[
+            AC_MSG_RESULT(yes)
+        succeeded=yes
+        found_system=yes
+            ],[
+            ])
+        AC_LANG_POP([C++])
+    fi
+
+    if test "$succeeded" != "yes" ; then
+        if test "$_version" = "0" ; then
+            AC_MSG_NOTICE([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option.  If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>. See http://randspringer.de/boost for more documentation.]])
+        else
+            AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).])
+        fi
+        # execute ACTION-IF-NOT-FOUND (if present):
+        ifelse([$3], , :, [$3])
+    else
+        AC_SUBST(BOOST_CPPFLAGS)
+        AC_SUBST(BOOST_LDFLAGS)
+        AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available])
+        # execute ACTION-IF-FOUND (if present):
+        ifelse([$2], , :, [$2])
+    fi
+
+    CPPFLAGS="$CPPFLAGS_SAVED"
+    LDFLAGS="$LDFLAGS_SAVED"
+fi
+
+])
\ No newline at end of file
diff --git a/folly/m4/ax_boost_regex.m4 b/folly/m4/ax_boost_regex.m4
new file mode 100644
index 00000000..f3e5cc18
--- /dev/null
+++ b/folly/m4/ax_boost_regex.m4
@@ -0,0 +1,111 @@
+# ===========================================================================
+#      http://www.gnu.org/software/autoconf-archive/ax_boost_regex.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_BOOST_REGEX
+#
+# DESCRIPTION
+#
+#   Test for Regex library from the Boost C++ libraries. The macro requires
+#   a preceding call to AX_BOOST_BASE. Further documentation is available at
+#   <http://randspringer.de/boost/index.html>.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_REGEX_LIB)
+#
+#   And sets:
+#
+#     HAVE_BOOST_REGEX
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
+#   Copyright (c) 2008 Michael Tindal
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 20
+
+AC_DEFUN([AX_BOOST_REGEX],
+[
+        AC_ARG_WITH([boost-regex],
+        AS_HELP_STRING([--with-boost-regex@<:@=special-lib@:>@],
+                   [use the Regex library from boost - it is possible to specify a certain library for the linker
+                        e.g. --with-boost-regex=boost_regex-gcc-mt-d-1_33_1 ]),
+        [
+        if test "$withval" = "no"; then
+                want_boost="no"
+        elif test "$withval" = "yes"; then
+            want_boost="yes"
+            ax_boost_user_regex_lib=""
+        else
+                    want_boost="yes"
+                        ax_boost_user_regex_lib="$withval"
+                                fi
+        ],
+        [want_boost="yes"]
+        )
+
+        if test "x$want_boost" = "xyes"; then
+        AC_REQUIRE([AC_PROG_CC])
+                CPPFLAGS_SAVED="$CPPFLAGS"
+                        CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+                                            export CPPFLAGS
+
+                                                   LDFLAGS_SAVED="$LDFLAGS"
+                                                        LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+                                                                          export LDFLAGS
+
+        AC_CACHE_CHECK(whether the Boost::Regex library is available,
+                                                           ax_cv_boost_regex,
+        [AC_LANG_PUSH([C++])
+                         AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/regex.hpp>
+                                                                                                                            ]],
+                                   [[boost::regex r(); return 0;]])],
+                   ax_cv_boost_regex=yes, ax_cv_boost_regex=no)
+         AC_LANG_POP([C++])
+                ])
+                        if test "x$ax_cv_boost_regex" = "xyes"; then
+                                AC_DEFINE(HAVE_BOOST_REGEX,,[define if the Boost::Regex library is available])
+            BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
+            if test "x$ax_boost_user_regex_lib" = "x"; then
+                for libextension in `ls $BOOSTLIBDIR/libboost_regex*.so* $BOOSTLIBDIR/libboost_regex*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_regex.*\)\.so.*$;\1;' -e 's;^lib\(boost_regex.*\)\.a*$;\1;'` ; do
+                     ax_lib=${libextension}
+                                            AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break],
+                                 [link_regex="no"])
+                                                        done
+                if test "x$link_regex" != "xyes"; then
+                for libextension in `ls $BOOSTLIBDIR/boost_regex*.{dll,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_regex.*\)\.dll.*$;\1;' -e 's;^\(boost_regex.*\)\.a*$;\1;'` ; do
+                     ax_lib=${libextension}
+                                            AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break],
+                                 [link_regex="no"])
+                                                        done
+                fi
+
+            else
+               for ax_lib in $ax_boost_user_regex_lib boost_regex-$ax_boost_user_regex_lib; do
+                                   AC_CHECK_LIB($ax_lib, main,
+                                   [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break],
+                                   [link_regex="no"])
+               done
+            fi
+            if test "x$ax_lib" = "x"; then
+                AC_MSG_ERROR(Could not find a version of the Boost::Regex library!)
+            fi
+                        if test "x$link_regex" != "xyes"; then
+                                               AC_MSG_ERROR(Could not link against $ax_lib !)
+                                                                      fi
+                                                                        fi
+
+                                                                                CPPFLAGS="$CPPFLAGS_SAVED"
+                                                                                LDFLAGS="$LDFLAGS_SAVED"
+                                                                                fi
+])
\ No newline at end of file
diff --git a/folly/m4/ax_boost_thread.m4 b/folly/m4/ax_boost_thread.m4
new file mode 100644
index 00000000..971f7ab8
--- /dev/null
+++ b/folly/m4/ax_boost_thread.m4
@@ -0,0 +1,149 @@
+# ===========================================================================
+#      http://www.gnu.org/software/autoconf-archive/ax_boost_thread.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_BOOST_THREAD
+#
+# DESCRIPTION
+#
+#   Test for Thread library from the Boost C++ libraries. The macro requires
+#   a preceding call to AX_BOOST_BASE. Further documentation is available at
+#   <http://randspringer.de/boost/index.html>.
+#
+#   This macro calls:
+#
+#     AC_SUBST(BOOST_THREAD_LIB)
+#
+#   And sets:
+#
+#     HAVE_BOOST_THREAD
+#
+# LICENSE
+#
+#   Copyright (c) 2009 Thomas Porschberg <thomas@randspringer.de>
+#   Copyright (c) 2009 Michael Tindal
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved. This file is offered as-is, without any
+#   warranty.
+
+#serial 23
+
+AC_DEFUN([AX_BOOST_THREAD],
+[
+        AC_ARG_WITH([boost-thread],
+        AS_HELP_STRING([--with-boost-thread@<:@=special-lib@:>@],
+                   [use the Thread library from boost - it is possible to specify a certain library for the linker
+                        e.g. --with-boost-thread=boost_thread-gcc-mt ]),
+        [
+        if test "$withval" = "no"; then
+                want_boost="no"
+        elif test "$withval" = "yes"; then
+            want_boost="yes"
+            ax_boost_user_thread_lib=""
+        else
+                    want_boost="yes"
+                        ax_boost_user_thread_lib="$withval"
+                                fi
+        ],
+        [want_boost="yes"]
+        )
+
+        if test "x$want_boost" = "xyes"; then
+        AC_REQUIRE([AC_PROG_CC])
+        AC_REQUIRE([AC_CANONICAL_BUILD])
+                CPPFLAGS_SAVED="$CPPFLAGS"
+                        CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+                                            export CPPFLAGS
+
+                                                   LDFLAGS_SAVED="$LDFLAGS"
+                                                        LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
+                                                                          export LDFLAGS
+
+        AC_CACHE_CHECK(whether the Boost::Thread library is available,
+                                                            ax_cv_boost_thread,
+        [AC_LANG_PUSH([C++])
+                         CXXFLAGS_SAVE=$CXXFLAGS
+
+                                         if test "x$host_os" = "xsolaris" ; then
+                                                              CXXFLAGS="-pthreads $CXXFLAGS"
+                                                                                         elif test "x$host_os" = "xmingw32" ; then
+                                                                                                                CXXFLAGS="-mthreads $CXXFLAGS"
+                                                                                                                                      else
+                                                                                                                                         CXXFLAGS="-pthread $CXXFLAGS"
+                                                                                                                                                              fi
+                                                                                                                                                                 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/thread/thread.hpp>]],
+                                   [[boost::thread_group thrds;
+                                   return 0;]])],
+                   ax_cv_boost_thread=yes, ax_cv_boost_thread=no)
+                                                 CXXFLAGS=$CXXFLAGS_SAVE
+             AC_LANG_POP([C++])
+                ])
+                        if test "x$ax_cv_boost_thread" = "xyes"; then
+           if test "x$host_os" = "xsolaris" ; then
+                     BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS"
+                                                  elif test "x$host_os" = "xmingw32" ; then
+                                                              BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS"
+                                                                                           else
+                                                                                                          BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS"
+                                                                                                                                      fi
+
+                                                                                                                                        AC_SUBST(BOOST_CPPFLAGS)
+
+                                                                                                                                          AC_DEFINE(HAVE_BOOST_THREAD,,[define if the Boost::Thread library is available])
+            BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
+
+                                             LDFLAGS_SAVE=$LDFLAGS
+                        case "x$host_os" in
+                          *bsd* )
+                               LDFLAGS="-pthread $LDFLAGS"
+                          break;
+                          ;;
+                        esac
+            if test "x$ax_boost_user_thread_lib" = "x"; then
+                for libextension in `ls $BOOSTLIBDIR/libboost_thread*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.a*$;\1;'`; do
+                     ax_lib=${libextension}
+                                            AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
+                                 [link_thread="no"])
+                                                        done
+                if test "x$link_thread" != "xyes"; then
+                for libextension in `ls $BOOSTLIBDIR/boost_thread*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.a*$;\1;'` ; do
+                     ax_lib=${libextension}
+                                            AC_CHECK_LIB($ax_lib, exit,
+                                 [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
+                                 [link_thread="no"])
+                                                        done
+                fi
+
+            else
+               for ax_lib in $ax_boost_user_thread_lib boost_thread-$ax_boost_user_thread_lib; do
+                                   AC_CHECK_LIB($ax_lib, exit,
+                                   [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
+                                   [link_thread="no"])
+                  done
+
+            fi
+            if test "x$ax_lib" = "x"; then
+                AC_MSG_ERROR(Could not find a version of the library!)
+            fi
+                        if test "x$link_thread" = "xno"; then
+                                                AC_MSG_ERROR(Could not link against $ax_lib !)
+                        else
+                           case "x$host_os" in
+                              *bsd* )
+                                                BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS"
+                              break;
+                              ;;
+                           esac
+
+                                        fi
+                                                fi
+
+                                                        CPPFLAGS="$CPPFLAGS_SAVED"
+                                                        LDFLAGS="$LDFLAGS_SAVED"
+                                                        fi
+])
\ No newline at end of file
diff --git a/folly/m4/ax_prefix_config.m4 b/folly/m4/ax_prefix_config.m4
new file mode 100644
index 00000000..c40d2df0
--- /dev/null
+++ b/folly/m4/ax_prefix_config.m4
@@ -0,0 +1,209 @@
+# ===========================================================================
+#    http://www.gnu.org/software/autoconf-archive/ax_prefix_config_h.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_PREFIX_CONFIG_H [(OUTPUT-HEADER [,PREFIX [,ORIG-HEADER]])]
+#
+# DESCRIPTION
+#
+#   This is a new variant from ac_prefix_config_ this one will use a
+#   lowercase-prefix if the config-define was starting with a
+#   lowercase-char, e.g. "#define const", "#define restrict", or "#define
+#   off_t", (and this one can live in another directory, e.g.
+#   testpkg/config.h therefore I decided to move the output-header to be the
+#   first arg)
+#
+#   takes the usual config.h generated header file; looks for each of the
+#   generated "#define SOMEDEF" lines, and prefixes the defined name (ie.
+#   makes it "#define PREFIX_SOMEDEF". The result is written to the output
+#   config.header file. The PREFIX is converted to uppercase for the
+#   conversions.
+#
+#   Defaults:
+#
+#     OUTPUT-HEADER = $PACKAGE-config.h
+#     PREFIX = $PACKAGE
+#     ORIG-HEADER, from AM_CONFIG_HEADER(config.h)
+#
+#   Your configure.ac script should contain both macros in this order, and
+#   unlike the earlier variations of this prefix-macro it is okay to place
+#   the AX_PREFIX_CONFIG_H call before the AC_OUTPUT invokation.
+#
+#   Example:
+#
+#     AC_INIT(config.h.in)        # config.h.in as created by "autoheader"
+#     AM_INIT_AUTOMAKE(testpkg, 0.1.1)    # makes #undef VERSION and PACKAGE
+#     AM_CONFIG_HEADER(config.h)          # prep config.h from config.h.in
+#     AX_PREFIX_CONFIG_H(mylib/_config.h) # prep mylib/_config.h from it..
+#     AC_MEMORY_H                         # makes "#undef NEED_MEMORY_H"
+#     AC_C_CONST_H                        # makes "#undef const"
+#     AC_OUTPUT(Makefile)                 # creates the "config.h" now
+#                                         # and also mylib/_config.h
+#
+#   if the argument to AX_PREFIX_CONFIG_H would have been omitted then the
+#   default outputfile would have been called simply "testpkg-config.h", but
+#   even under the name "mylib/_config.h" it contains prefix-defines like
+#
+#     #ifndef TESTPKG_VERSION
+#     #define TESTPKG_VERSION "0.1.1"
+#     #endif
+#     #ifndef TESTPKG_NEED_MEMORY_H
+#     #define TESTPKG_NEED_MEMORY_H 1
+#     #endif
+#     #ifndef _testpkg_const
+#     #define _testpkg_const _const
+#     #endif
+#
+#   and this "mylib/_config.h" can be installed along with other
+#   header-files, which is most convenient when creating a shared library
+#   (that has some headers) where some functionality is dependent on the
+#   OS-features detected at compile-time. No need to invent some
+#   "mylib-confdefs.h.in" manually. :-)
+#
+#   Note that some AC_DEFINEs that end up in the config.h file are actually
+#   self-referential - e.g. AC_C_INLINE, AC_C_CONST, and the AC_TYPE_OFF_T
+#   say that they "will define inline|const|off_t if the system does not do
+#   it by itself". You might want to clean up about these - consider an
+#   extra mylib/conf.h that reads something like:
+#
+#     #include <mylib/_config.h>
+#     #ifndef _testpkg_const
+#     #define _testpkg_const const
+#     #endif
+#
+#   and then start using _testpkg_const in the header files. That is also a
+#   good thing to differentiate whether some library-user has starting to
+#   take up with a different compiler, so perhaps it could read something
+#   like this:
+#
+#     #ifdef _MSC_VER
+#     #include <mylib/_msvc.h>
+#     #else
+#     #include <mylib/_config.h>
+#     #endif
+#     #ifndef _testpkg_const
+#     #define _testpkg_const const
+#     #endif
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+#   Copyright (c) 2008 Marten Svantesson
+#   Copyright (c) 2008 Gerald Point <Gerald.Point@labri.fr>
+#
+#   This program is free software; you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation; either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 11
+
+AC_DEFUN([AX_PREFIX_CONFIG_H],[dnl
+AC_PREREQ([2.62])
+AC_BEFORE([AC_CONFIG_HEADERS],[$0])dnl
+AC_CONFIG_COMMANDS([ifelse($1,,$PACKAGE-config.h,$1)],[dnl
+AS_VAR_PUSHDEF([_OUT],[ac_prefix_conf_OUT])dnl
+AS_VAR_PUSHDEF([_DEF],[ac_prefix_conf_DEF])dnl
+AS_VAR_PUSHDEF([_PKG],[ac_prefix_conf_PKG])dnl
+AS_VAR_PUSHDEF([_LOW],[ac_prefix_conf_LOW])dnl
+AS_VAR_PUSHDEF([_UPP],[ac_prefix_conf_UPP])dnl
+AS_VAR_PUSHDEF([_INP],[ac_prefix_conf_INP])dnl
+m4_pushdef([_script],[conftest.prefix])dnl
+m4_pushdef([_symbol],[m4_cr_Letters[]m4_cr_digits[]_])dnl
+_OUT=`echo ifelse($1, , $PACKAGE-config.h, $1)`
+_DEF=`echo _$_OUT | sed -e "y:m4_cr_letters:m4_cr_LETTERS[]:" -e "s/@<:@^m4_cr_Letters@:>@/_/g"`
+_PKG=`echo ifelse($2, , $PACKAGE, $2)`
+_LOW=`echo _$_PKG | sed -e "y:m4_cr_LETTERS-:m4_cr_letters[]_:"`
+_UPP=`echo $_PKG | sed -e "y:m4_cr_letters-:m4_cr_LETTERS[]_:"  -e "/^@<:@m4_cr_digits@:>@/s/^/_/"`
+_INP=`echo "ifelse($3,,,$3)" | sed -e 's/ *//'`
+if test ".$_INP" = "."; then
+   for ac_file in : $CONFIG_HEADERS; do test "_$ac_file" = _: && continue
+     case "$ac_file" in
+        *.h) _INP=$ac_file ;;
+        *)
+     esac
+     test ".$_INP" != "." && break
+   done
+fi
+if test ".$_INP" = "."; then
+   case "$_OUT" in
+      */*) _INP=`basename "$_OUT"`
+      ;;
+      *-*) _INP=`echo "$_OUT" | sed -e "s/@<:@_symbol@:>@*-//"`
+      ;;
+      *) _INP=config.h
+      ;;
+   esac
+fi
+if test -z "$_PKG" ; then
+   AC_MSG_ERROR([no prefix for _PREFIX_PKG_CONFIG_H])
+else
+  if test ! -f "$_INP" ; then if test -f "$srcdir/$_INP" ; then
+     _INP="$srcdir/$_INP"
+  fi fi
+  AC_MSG_NOTICE(creating $_OUT - prefix $_UPP for $_INP defines)
+  if test -f $_INP ; then
+    AS_ECHO(["s/^@%:@undef  *\\(@<:@m4_cr_LETTERS[]_@:>@\\)/@%:@undef $_UPP""_\\1/"]) > _script
+    AS_ECHO(["s/^@%:@undef  *\\(@<:@m4_cr_letters@:>@\\)/@%:@undef $_LOW""_\\1/"]) >> _script
+    AS_ECHO(["s/^@%:@def[]ine  *\\(@<:@m4_cr_LETTERS[]_@:>@@<:@_symbol@:>@*\\)\\(.*\\)/@%:@ifndef $_UPP""_\\1\\"]) >> _script
+    AS_ECHO(["@%:@def[]ine $_UPP""_\\1\\2\\"]) >> _script
+    AS_ECHO(["@%:@endif/"]) >> _script
+    AS_ECHO(["s/^@%:@def[]ine  *\\(@<:@m4_cr_letters@:>@@<:@_symbol@:>@*\\)\\(.*\\)/@%:@ifndef $_LOW""_\\1\\"]) >> _script
+    AS_ECHO(["@%:@define $_LOW""_\\1\\2\\"]) >> _script
+    AS_ECHO(["@%:@endif/"]) >> _script
+    # now executing _script on _DEF input to create _OUT output file
+    echo "@%:@ifndef $_DEF"      >$tmp/pconfig.h
+    echo "@%:@def[]ine $_DEF 1" >>$tmp/pconfig.h
+    echo ' ' >>$tmp/pconfig.h
+    echo /'*' $_OUT. Generated automatically at end of configure. '*'/ >>$tmp/pconfig.h
+
+    sed -f _script $_INP >>$tmp/pconfig.h
+    echo ' ' >>$tmp/pconfig.h
+    echo '/* once:' $_DEF '*/' >>$tmp/pconfig.h
+    echo "@%:@endif" >>$tmp/pconfig.h
+    if cmp -s $_OUT $tmp/pconfig.h 2>/dev/null; then
+      AC_MSG_NOTICE([$_OUT is unchanged])
+    else
+      ac_dir=`AS_DIRNAME(["$_OUT"])`
+      AS_MKDIR_P(["$ac_dir"])
+      rm -f "$_OUT"
+      mv $tmp/pconfig.h "$_OUT"
+    fi
+    cp _script _configs.sed
+  else
+    AC_MSG_ERROR([input file $_INP does not exist - skip generating $_OUT])
+  fi
+  rm -f conftest.*
+fi
+m4_popdef([_symbol])dnl
+m4_popdef([_script])dnl
+AS_VAR_POPDEF([_INP])dnl
+AS_VAR_POPDEF([_UPP])dnl
+AS_VAR_POPDEF([_LOW])dnl
+AS_VAR_POPDEF([_PKG])dnl
+AS_VAR_POPDEF([_DEF])dnl
+AS_VAR_POPDEF([_OUT])dnl
+],[PACKAGE="$PACKAGE"])])
\ No newline at end of file
diff --git a/folly/small_vector.h b/folly/small_vector.h
new file mode 100644
index 00000000..0a75d3ad
--- /dev/null
+++ b/folly/small_vector.h
@@ -0,0 +1,1189 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * For high-level documentation and usage examples see folly/doc/small_vector.md
+ *
+ * @author Jordan DeLong <delong.j@fb.com>
+ */
+#ifndef FOLLY_SMALL_VECTOR_H_
+#define FOLLY_SMALL_VECTOR_H_
+
+#include "Portability.h"
+
+#include <stdexcept>
+#include <cstdlib>
+#include <type_traits>
+#include <algorithm>
+#include <iterator>
+#include <cassert>
+
+#include <boost/operators.hpp>
+#include <boost/type_traits.hpp>
+#include <boost/mpl/if.hpp>
+#include <boost/mpl/eval_if.hpp>
+#include <boost/mpl/vector.hpp>
+#include <boost/mpl/front.hpp>
+#include <boost/mpl/filter_view.hpp>
+#include <boost/mpl/identity.hpp>
+#include <boost/mpl/placeholders.hpp>
+#include <boost/mpl/empty.hpp>
+#include <boost/mpl/size.hpp>
+#include <boost/mpl/count.hpp>
+#include <boost/mpl/max.hpp>
+
+#include "folly/Malloc.h"
+
+#if defined(__GNUC__) && defined(__x86_64__)
+# include "folly/SmallLocks.h"
+# define FB_PACKED __attribute__((packed))
+#else
+# define FB_PACKED
+#endif
+
+#ifdef FOLLY_HAVE_MALLOC_SIZE
+  extern "C" std::size_t malloc_size(const void*);
+# ifndef FOLLY_HAVE_MALLOC_USABLE_SIZE
+#  define malloc_usable_size malloc_size
+# endif
+# ifndef malloc_usable_size
+#  define malloc_usable_size malloc_size
+# endif
+#endif
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+namespace small_vector_policy {
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * A flag which makes us refuse to use the heap at all.  If we
+ * overflow the in situ capacity we throw an exception.
+ */
+struct NoHeap;
+
+/*
+ * Passing this policy will cause small_vector to provide lock() and
+ * unlock() functions using a 1-bit spin lock in the size value.
+ *
+ * Note that this is intended for a fairly specialized (although
+ * strangely common at facebook) use case, where you have billions of
+ * vectors in memory where none of them are "hot" and most of them are
+ * small.  This allows you to get fine-grained locks without spending
+ * a lot of memory on mutexes (the alternative of a large hashtable of
+ * locks leads to extra cache misses in the lookup path).
+ *
+ * __x86_64__ only.
+ */
+struct OneBitMutex;
+
+//////////////////////////////////////////////////////////////////////
+
+} // small_vector_policy
+
+//////////////////////////////////////////////////////////////////////
+
+template<class T, std::size_t M, class A, class B, class C>
+struct small_vector;
+
+//////////////////////////////////////////////////////////////////////
+
+namespace detail {
+
+  /*
+   * Move a range to a range of uninitialized memory.  Assumes the
+   * ranges don't overlap.
+   */
+  template<class T>
+  typename std::enable_if<
+    !boost::has_trivial_copy<T>::value
+  >::type
+  moveToUninitialized(T* first, T* last, T* out) {
+    auto const count = last - first;
+    std::size_t idx = 0;
+    try {
+      for (; idx < count; ++first, ++idx) {
+        new (&out[idx]) T(std::move(*first));
+      }
+    } catch (...) {
+      // Even for callers trying to give the strong guarantee
+      // (e.g. push_back) it's ok to assume here that we don't have to
+      // move things back and that it was a copy constructor that
+      // threw: if someone throws from a move constructor the effects
+      // are unspecified.
+      for (std::size_t i = 0; i < idx; ++i) {
+        out[i].~T();
+      }
+      throw;
+    }
+  }
+
+  // Specialization for trivially copyable types.  (TODO: change to
+  // std::is_trivially_copyable when that works.)
+  template<class T>
+  typename std::enable_if<
+    boost::has_trivial_copy<T>::value
+  >::type
+  moveToUninitialized(T* first, T* last, T* out) {
+    std::memmove(out, first, (last - first) * sizeof *first);
+  }
+
+  /*
+   * Move objects in memory to the right into some uninitialized
+   * memory, where the region overlaps.  This doesn't just use
+   * std::move_backward because move_backward only works if all the
+   * memory is initialized to type T already.
+   */
+  template<class T>
+  typename std::enable_if<
+    !boost::has_trivial_copy<T>::value
+  >::type
+  moveObjectsRight(T* first, T* lastConstructed, T* realLast) {
+    if (lastConstructed == realLast) {
+      return;
+    }
+
+    T* end = first - 1; // Past the end going backwards.
+    T* out = realLast - 1;
+    T* in = lastConstructed - 1;
+    try {
+      for (; in != end && out >= lastConstructed; --in, --out) {
+        new (out) T(std::move(*in));
+      }
+      for (; in != end; --in, --out) {
+        *out = std::move(*in);
+      }
+      for (; out >= lastConstructed; --out) {
+        new (out) T();
+      }
+    } catch (...) {
+      // We want to make sure the same stuff is uninitialized memory
+      // if we exit via an exception (this is to make sure we provide
+      // the basic exception safety guarantee for insert functions).
+      if (out < lastConstructed) {
+        out = lastConstructed - 1;
+      }
+      for (auto it = out + 1; it != realLast; ++it) {
+        it->~T();
+      }
+      throw;
+    }
+  }
+
+  // Specialization for trivially copyable types.  The call to
+  // std::move_backward here will just turn into a memmove.  (TODO:
+  // change to std::is_trivially_copyable when that works.)
+  template<class T>
+  typename std::enable_if<
+    boost::has_trivial_copy<T>::value
+  >::type
+  moveObjectsRight(T* first, T* lastConstructed, T* realLast) {
+    std::move_backward(first, lastConstructed, realLast);
+  }
+
+  /*
+   * Populate a region of memory using `op' to construct elements.  If
+   * anything throws, undo what we did.
+   */
+  template<class T, class Function>
+  void populateMemForward(T* mem, std::size_t n, Function const& op) {
+    std::size_t idx = 0;
+    try {
+      for (int i = 0; i < n; ++i) {
+        op(&mem[idx]);
+        ++idx;
+      }
+    } catch (...) {
+      for (std::size_t i = 0; i < idx; ++i) {
+        mem[i].~T();
+      }
+      throw;
+    }
+  }
+
+  template<class SizeType, bool ShouldUseHeap>
+  struct IntegralSizePolicy {
+    typedef SizeType InternalSizeType;
+
+    IntegralSizePolicy() : size_(0) {}
+
+  protected:
+    std::size_t policyMaxSize() const {
+      return SizeType(~kExternMask);
+    }
+
+    std::size_t doSize() const {
+      return size_ & ~kExternMask;
+    }
+
+    std::size_t isExtern() const {
+      return kExternMask & size_;
+    }
+
+    void setExtern(bool b) {
+      if (b) {
+        size_ |= kExternMask;
+      } else {
+        size_ &= ~kExternMask;
+      }
+    }
+
+    void setSize(std::size_t sz) {
+      assert(sz <= policyMaxSize());
+      size_ = (kExternMask & size_) | SizeType(sz);
+    }
+
+    void swapSizePolicy(IntegralSizePolicy& o) {
+      std::swap(size_, o.size_);
+    }
+
+  protected:
+    static bool const kShouldUseHeap = ShouldUseHeap;
+
+  private:
+    static SizeType const kExternMask =
+      kShouldUseHeap ? SizeType(1) << (sizeof(SizeType) * 8 - 1)
+                     : 0;
+
+    SizeType size_;
+  };
+
+#ifdef __x86_64__
+  template<class SizeType, bool ShouldUseHeap>
+  struct OneBitMutexImpl {
+    typedef SizeType InternalSizeType;
+
+    OneBitMutexImpl() { psl_.init(); }
+
+    void lock()     const { psl_.lock(); }
+    void unlock()   const { psl_.unlock(); }
+    bool try_lock() const { return psl_.try_lock(); }
+
+  protected:
+    static bool const kShouldUseHeap = ShouldUseHeap;
+
+    std::size_t policyMaxSize() const {
+      return SizeType(~(SizeType(1) << kLockBit | kExternMask));
+    }
+
+    std::size_t doSize() const {
+      return psl_.getData() & ~kExternMask;
+    }
+
+    std::size_t isExtern() const {
+      return psl_.getData() & kExternMask;
+    }
+
+    void setExtern(bool b) {
+      if (b) {
+        setSize(SizeType(doSize()) | kExternMask);
+      } else {
+        setSize(SizeType(doSize()) & ~kExternMask);
+      }
+    }
+
+    void setSize(std::size_t sz) {
+      assert(sz < (std::size_t(1) << kLockBit));
+      psl_.setData((kExternMask & psl_.getData()) | SizeType(sz));
+    }
+
+    void swapSizePolicy(OneBitMutexImpl& o) {
+      std::swap(psl_, o.psl_);
+    }
+
+  private:
+    static SizeType const kLockBit = sizeof(SizeType) * 8 - 1;
+    static SizeType const kExternMask =
+      kShouldUseHeap ? SizeType(1) << (sizeof(SizeType) * 8 - 2)
+                     : 0;
+
+    PicoSpinLock<SizeType,kLockBit> psl_;
+  };
+#else
+  template<class SizeType, bool ShouldUseHeap>
+  struct OneBitMutexImpl {
+    static_assert(std::is_same<SizeType,void>::value,
+                  "OneBitMutex only works on x86-64");
+  };
+#endif
+
+  /*
+   * If you're just trying to use this class, ignore everything about
+   * this next small_vector_base class thing.
+   *
+   * The purpose of this junk is to minimize sizeof(small_vector<>)
+   * and allow specifying the template parameters in whatever order is
+   * convenient for the user.  There's a few extra steps here to try
+   * to keep the error messages at least semi-reasonable.
+   *
+   * Apologies for all the black magic.
+   */
+  namespace mpl = boost::mpl;
+  template<class Value,
+           std::size_t RequestedMaxInline,
+           class InPolicyA,
+           class InPolicyB,
+           class InPolicyC>
+  struct small_vector_base {
+    typedef mpl::vector<InPolicyA,InPolicyB,InPolicyC> PolicyList;
+
+    /*
+     * Determine the size type
+     */
+    typedef typename mpl::filter_view<
+      PolicyList,
+      boost::is_integral<mpl::placeholders::_1>
+    >::type Integrals;
+    typedef typename mpl::eval_if<
+      mpl::empty<Integrals>,
+      mpl::identity<std::size_t>,
+      mpl::front<Integrals>
+    >::type SizeType;
+
+    static_assert(std::is_unsigned<SizeType>::value,
+                  "Size type should be an unsigned integral type");
+    static_assert(mpl::size<Integrals>::value == 0 ||
+                    mpl::size<Integrals>::value == 1,
+                  "Multiple size types specified in small_vector<>");
+
+    /*
+     * Figure out if we're supposed to supply a one-bit mutex. :)
+     */
+    typedef typename mpl::count<
+      PolicyList,small_vector_policy::OneBitMutex
+    >::type HasMutex;
+
+    static_assert(HasMutex::value == 0 || HasMutex::value == 1,
+                  "Multiple copies of small_vector_policy::OneBitMutex "
+                  "supplied; this is probably a mistake");
+
+    /*
+     * Determine whether we should allow spilling to the heap or not.
+     */
+    typedef typename mpl::count<
+      PolicyList,small_vector_policy::NoHeap
+    >::type HasNoHeap;
+
+    static_assert(HasNoHeap::value == 0 || HasNoHeap::value == 1,
+                  "Multiple copies of small_vector_policy::NoHeap "
+                  "supplied; this is probably a mistake");
+
+    /*
+     * Make the real policy base classes.
+     */
+    typedef typename mpl::if_<
+      HasMutex,
+      OneBitMutexImpl<SizeType,!HasNoHeap::value>,
+      IntegralSizePolicy<SizeType,!HasNoHeap::value>
+    >::type ActualSizePolicy;
+
+    /*
+     * Now inherit from them all.  This is done in such a convoluted
+     * way to make sure we get the empty base optimizaton on all these
+     * types to keep sizeof(small_vector<>) minimal.
+     */
+    typedef boost::totally_ordered1<
+      small_vector<Value,RequestedMaxInline,InPolicyA,InPolicyB,InPolicyC>,
+      ActualSizePolicy
+    > type;
+  };
+
+  template <class T>
+  T* pointerFlagSet(T* p) {
+    return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(p) | 1);
+  }
+  template <class T>
+  bool pointerFlagGet(T* p) {
+    return reinterpret_cast<uintptr_t>(p) & 1;
+  }
+  template <class T>
+  T* pointerFlagClear(T* p) {
+    return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(p) & ~1);
+  }
+  inline void* shiftPointer(void* p, size_t sizeBytes) {
+    return static_cast<char*>(p) + sizeBytes;
+  }
+}
+
+//////////////////////////////////////////////////////////////////////
+
+template<class Value,
+         std::size_t RequestedMaxInline    = 1,
+         class PolicyA                     = void,
+         class PolicyB                     = void,
+         class PolicyC                     = void>
+class small_vector
+  : public detail::small_vector_base<
+      Value,RequestedMaxInline,PolicyA,PolicyB,PolicyC
+    >::type
+{
+  typedef typename detail::small_vector_base<
+    Value,RequestedMaxInline,PolicyA,PolicyB,PolicyC
+  >::type BaseType;
+  typedef typename BaseType::InternalSizeType InternalSizeType;
+
+  /*
+   * Figure out the max number of elements we should inline.  (If
+   * the user asks for less inlined elements than we can fit unioned
+   * into our value_type*, we will inline more than they asked.)
+   */
+  enum {
+    MaxInline = boost::mpl::max<
+                  boost::mpl::int_<sizeof(Value*) / sizeof(Value)>,
+                  boost::mpl::int_<RequestedMaxInline>
+                >::type::value
+  };
+
+public:
+  typedef std::size_t size_type;
+  typedef Value              value_type;
+  typedef value_type&        reference;
+  typedef value_type const&  const_reference;
+  typedef value_type*        iterator;
+  typedef value_type const*  const_iterator;
+  typedef std::ptrdiff_t     difference_type;
+
+  typedef std::reverse_iterator<iterator>       reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+  explicit small_vector() {}
+
+  small_vector(small_vector const& o) {
+    assign(o.begin(), o.end());
+  }
+
+  small_vector(small_vector&& o) {
+    *this = std::move(o);
+  }
+
+  small_vector(std::initializer_list<value_type> il) {
+    constructImpl(il.begin(), il.end(), std::false_type());
+  }
+
+  explicit small_vector(size_type n, value_type const& t = value_type()) {
+    doConstruct(n, t);
+  }
+
+  template<class Arg>
+  explicit small_vector(Arg arg1, Arg arg2)  {
+    // Forward using std::is_arithmetic to get to the proper
+    // implementation; this disambiguates between the iterators and
+    // (size_t, value_type) meaning for this constructor.
+    constructImpl(arg1, arg2, std::is_arithmetic<Arg>());
+  }
+
+  ~small_vector() {
+    for (auto& t : *this) {
+      (&t)->~value_type();
+    }
+    if (this->isExtern()) {
+      u.freeHeap();
+    }
+  }
+
+  small_vector& operator=(small_vector const& o) {
+    assign(o.begin(), o.end());
+    return *this;
+  }
+
+  small_vector& operator=(small_vector&& o) {
+    clear();
+    if (!o.isExtern()) {
+      makeSize(o.size());
+      for (std::size_t i = 0; i < o.size(); ++i) {
+        new (data() + i) value_type(std::move(o[i]));
+      }
+      this->setSize(o.size());
+    } else {
+      swap(o);
+    }
+    return *this;
+  }
+
+  bool operator==(small_vector const& o) const {
+    return size() == o.size() && std::equal(begin(), end(), o.begin());
+  }
+
+  bool operator<(small_vector const& o) const {
+    return std::lexicographical_compare(begin(), end(), o.begin(), o.end());
+  }
+
+  size_type max_size() const {
+    return !BaseType::kShouldUseHeap ? MaxInline
+                                     : this->policyMaxSize();
+  }
+
+  size_type size()         const { return this->doSize(); }
+  bool      empty()        const { return !size(); }
+
+  iterator       begin()         { return data(); }
+  iterator       end()           { return data() + size(); }
+  const_iterator begin()   const { return data(); }
+  const_iterator end()     const { return data() + size(); }
+  const_iterator cbegin()  const { return begin(); }
+  const_iterator cend()    const { return end(); }
+
+  reverse_iterator       rbegin()        { return reverse_iterator(end()); }
+  reverse_iterator       rend()          { return reverse_iterator(begin()); }
+
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  const_reverse_iterator crbegin() const { return rbegin(); }
+  const_reverse_iterator crend()   const { return rend(); }
+
+  /*
+   * Usually one of the simplest functions in a Container-like class
+   * but a bit more complex here.  We have to handle all combinations
+   * of in-place vs. heap between this and o.
+   *
+   * Basic guarantee only.  Provides the nothrow guarantee iff our
+   * value_type has a nothrow move or copy constructor.
+   */
+  void swap(small_vector& o) {
+    using std::swap; // Allow ADL on swap for our value_type.
+
+    if (this->isExtern() && o.isExtern()) {
+      this->swapSizePolicy(o);
+
+      auto thisCapacity = this->capacity();
+      auto oCapacity = o.capacity();
+
+      std::swap(unpackHack(&u.pdata_.heap_), unpackHack(&o.u.pdata_.heap_));
+
+      this->setCapacity(oCapacity);
+      o.setCapacity(thisCapacity);
+
+      return;
+    }
+
+    if (!this->isExtern() && !o.isExtern()) {
+      auto& oldSmall = size() < o.size() ? *this : o;
+      auto& oldLarge = size() < o.size() ? o : *this;
+
+      for (size_type i = 0; i < oldSmall.size(); ++i) {
+        swap(oldSmall[i], oldLarge[i]);
+      }
+
+      size_type i = oldSmall.size();
+      try {
+        for (; i < oldLarge.size(); ++i) {
+          new (&oldSmall[i]) value_type(std::move(oldLarge[i]));
+          oldLarge[i].~value_type();
+        }
+      } catch (...) {
+        for (; i < oldLarge.size(); ++i) {
+          oldLarge[i].~value_type();
+        }
+        oldLarge.setSize(oldSmall.size());
+        throw;
+      }
+      this->swapSizePolicy(o);
+      return;
+    }
+
+    // isExtern != o.isExtern()
+    auto& oldExtern = o.isExtern() ? o : *this;
+    auto& oldIntern = o.isExtern() ? *this : o;
+
+    auto oldExternCapacity = oldExtern.capacity();
+    auto oldExternHeap     = oldExtern.u.pdata_.heap_;
+
+    auto buff = oldExtern.u.buffer();
+    size_type i = 0;
+    try {
+      for (; i < oldIntern.size(); ++i) {
+        new (&buff[i]) value_type(std::move(oldIntern[i]));
+        oldIntern[i].~value_type();
+      }
+    } catch (...) {
+      for (size_type kill = 0; kill < i; ++kill) {
+        buff[kill].~value_type();
+      }
+      for (; i < oldIntern.size(); ++i) {
+        oldIntern[i].~value_type();
+      }
+      oldIntern.setSize(0);
+      oldExtern.u.pdata_.heap_ = oldExternHeap;
+      oldExtern.setCapacity(oldExternCapacity);
+      throw;
+    }
+    oldIntern.u.pdata_.heap_ = oldExternHeap;
+    this->swapSizePolicy(o);
+    oldIntern.setCapacity(oldExternCapacity);
+  }
+
+  void resize(size_type sz) {
+    if (sz < size()) {
+      erase(begin() + sz, end());
+      return;
+    }
+    makeSize(sz);
+    detail::populateMemForward(begin() + size(), sz - size(),
+      [&] (void* p) { new (p) value_type(); }
+    );
+    this->setSize(sz);
+  }
+
+  void resize(size_type sz, value_type const& v) {
+    if (sz < size()) {
+      erase(begin() + sz, end());
+      return;
+    }
+    makeSize(sz);
+    detail::populateMemForward(begin() + size(), sz - size(),
+      [&] (void* p) { new (p) value_type(v); }
+    );
+    this->setSize(sz);
+  }
+
+  value_type* data() noexcept {
+    return this->isExtern() ? u.heap() : u.buffer();
+  }
+
+  value_type const* data() const noexcept {
+    return this->isExtern() ? u.heap() : u.buffer();
+  }
+
+  template<class ...Args>
+  iterator emplace(const_iterator p, Args&&... args) {
+    if (p == cend()) {
+      emplace_back(std::forward<Args>(args)...);
+      return end() - 1;
+    }
+
+    /*
+     * We implement emplace at places other than at the back with a
+     * temporary for exception safety reasons.  It is possible to
+     * avoid having to do this, but it becomes hard to maintain the
+     * basic exception safety guarantee (unless you respond to a copy
+     * constructor throwing by clearing the whole vector).
+     *
+     * The reason for this is that otherwise you have to destruct an
+     * element before constructing this one in its place---if the
+     * constructor throws, you either need a nothrow default
+     * constructor or a nothrow copy/move to get something back in the
+     * "gap", and the vector requirements don't guarantee we have any
+     * of these.  Clearing the whole vector is a legal response in
+     * this situation, but it seems like this implementation is easy
+     * enough and probably better.
+     */
+    return insert(p, value_type(std::forward<Args>(args)...));
+  }
+
+  void reserve(size_type sz) {
+    makeSize(sz);
+  }
+
+  size_type capacity() const {
+    if (this->isExtern()) {
+      if (u.hasCapacity()) {
+        return *u.getCapacity();
+      }
+      return malloc_usable_size(u.pdata_.heap_) / sizeof(value_type);
+    }
+    return MaxInline;
+  }
+
+  void shrink_to_fit() {
+    if (!this->isExtern()) {
+      return;
+    }
+
+    small_vector tmp(begin(), end());
+    tmp.swap(*this);
+  }
+
+  template<class ...Args>
+  void emplace_back(Args&&... args) {
+    // call helper function for static dispatch of special cases
+    emplaceBack(std::forward<Args>(args)...);
+  }
+
+  void push_back(value_type&& t) {
+    if (capacity() == size()) {
+      makeSize(std::max(size_type(2), 3 * size() / 2), &t, size());
+    } else {
+      new (end()) value_type(std::move(t));
+    }
+    this->setSize(size() + 1);
+  }
+
+  void push_back(value_type const& t) {
+    // Make a copy and forward to the rvalue value_type&& overload
+    // above.
+    push_back(value_type(t));
+  }
+
+  void pop_back() {
+    erase(end() - 1);
+  }
+
+  iterator insert(const_iterator constp, value_type&& t) {
+    iterator p = unconst(constp);
+
+    if (p == end()) {
+      push_back(std::move(t));
+      return end() - 1;
+    }
+
+    auto offset = p - begin();
+
+    if (capacity() == size()) {
+      makeSize(size() + 1, &t, offset);
+      this->setSize(this->size() + 1);
+    } else {
+      makeSize(size() + 1);
+      detail::moveObjectsRight(data() + offset,
+                               data() + size(),
+                               data() + size() + 1);
+      this->setSize(size() + 1);
+      data()[offset] = std::move(t);
+    }
+    return begin() + offset;
+
+  }
+
+  iterator insert(const_iterator p, value_type const& t) {
+    // Make a copy and forward to the rvalue value_type&& overload
+    // above.
+    return insert(p, value_type(t));
+  }
+
+  iterator insert(const_iterator pos, size_type n, value_type const& val) {
+    auto offset = pos - begin();
+    makeSize(size() + n);
+    detail::moveObjectsRight(data() + offset,
+                             data() + size(),
+                             data() + size() + n);
+    this->setSize(size() + n);
+    std::generate_n(begin() + offset, n, [&] { return val; });
+    return begin() + offset;
+  }
+
+  template<class Arg>
+  iterator insert(const_iterator p, Arg arg1, Arg arg2) {
+    // Forward using std::is_arithmetic to get to the proper
+    // implementation; this disambiguates between the iterators and
+    // (size_t, value_type) meaning for this function.
+    return insertImpl(unconst(p), arg1, arg2, std::is_arithmetic<Arg>());
+  }
+
+  iterator insert(const_iterator p, std::initializer_list<value_type> il) {
+    return insert(p, il.begin(), il.end());
+  }
+
+  iterator erase(const_iterator q) {
+    std::move(unconst(q) + 1, end(), unconst(q));
+    (data() + size() - 1)->~value_type();
+    this->setSize(size() - 1);
+    return unconst(q);
+  }
+
+  iterator erase(const_iterator q1, const_iterator q2) {
+    std::move(unconst(q2), end(), unconst(q1));
+    for (auto it = q1; it != end(); ++it) {
+      it->~value_type();
+    }
+    this->setSize(size() - (q2 - q1));
+    return unconst(q1);
+  }
+
+  void clear() {
+    erase(begin(), end());
+  }
+
+  template<class Arg>
+  void assign(Arg first, Arg last) {
+    clear();
+    insert(end(), first, last);
+  }
+
+  void assign(std::initializer_list<value_type> il) {
+    assign(il.begin(), il.end());
+  }
+
+  void assign(size_type n, const value_type& t) {
+    clear();
+    insert(end(), n, t);
+  }
+
+  reference front()             { assert(!empty()); return *begin(); }
+  reference back()              { assert(!empty()); return *(end() - 1); }
+  const_reference front() const { assert(!empty()); return *begin(); }
+  const_reference back() const  { assert(!empty()); return *(end() - 1); }
+
+  reference operator[](size_type i) {
+    assert(i < size());
+    return *(begin() + i);
+  }
+
+  const_reference operator[](size_type i) const {
+    assert(i < size());
+    return *(begin() + i);
+  }
+
+  reference at(size_type i) {
+    if (i >= size()) {
+      throw std::out_of_range();
+    }
+    return (*this)[i];
+  }
+
+  const_reference at(size_type i) const {
+    if (i >= size()) {
+      throw std::out_of_range();
+    }
+    return (*this)[i];
+  }
+
+private:
+
+  /*
+   * This is doing the same like emplace_back, but we need this helper
+   * to catch the special case - see the next overload function..
+   */
+  template<class ...Args>
+  void emplaceBack(Args&&... args) {
+    makeSize(size() + 1);
+    new (end()) value_type(std::forward<Args>(args)...);
+    this->setSize(size() + 1);
+  }
+
+  /*
+   * Special case of emplaceBack for rvalue
+   */
+  void emplaceBack(value_type&& t) {
+    push_back(std::move(t));
+  }
+
+  static iterator unconst(const_iterator it) {
+    return const_cast<iterator>(it);
+  }
+
+  /*
+   * g++ doesn't allow you to bind a non-const reference to a member
+   * of a packed structure, presumably because it would make it too
+   * easy to accidentally make an unaligned memory access?
+   */
+  template<class T> static T& unpackHack(T* p) {
+    return *p;
+  }
+
+  // The std::false_type argument is part of disambiguating the
+  // iterator insert functions from integral types (see insert().)
+  template<class It>
+  iterator insertImpl(iterator pos, It first, It last, std::false_type) {
+    typedef typename std::iterator_traits<It>::iterator_category categ;
+    if (std::is_same<categ,std::input_iterator_tag>::value) {
+      auto offset = pos - begin();
+      while (first != last) {
+        pos = insert(pos, *first++);
+        ++pos;
+      }
+      return begin() + offset;
+    }
+
+    auto distance = std::distance(first, last);
+    auto offset = pos - begin();
+    makeSize(size() + distance);
+    detail::moveObjectsRight(data() + offset,
+                             data() + size(),
+                             data() + size() + distance);
+    this->setSize(size() + distance);
+    std::copy_n(first, distance, begin() + offset);
+    return begin() + offset;
+  }
+
+  iterator insertImpl(iterator pos, size_type n, const value_type& val,
+      std::true_type) {
+    // The true_type means this should call the size_t,value_type
+    // overload.  (See insert().)
+    return insert(pos, n, val);
+  }
+
+  // The std::false_type argument came from std::is_arithmetic as part
+  // of disambiguating an overload (see the comment in the
+  // constructor).
+  template<class It>
+  void constructImpl(It first, It last, std::false_type) {
+    typedef typename std::iterator_traits<It>::iterator_category categ;
+    if (std::is_same<categ,std::input_iterator_tag>::value) {
+      // With iterators that only allow a single pass, we can't really
+      // do anything sane here.
+      while (first != last) {
+        push_back(*first++);
+      }
+      return;
+    }
+
+    auto distance = std::distance(first, last);
+    makeSize(distance);
+    this->setSize(distance);
+
+    detail::populateMemForward(data(), distance,
+      [&] (void* p) { new (p) value_type(*first++); }
+    );
+  }
+
+  void doConstruct(size_type n, value_type const& val) {
+    makeSize(n);
+    this->setSize(n);
+    detail::populateMemForward(data(), n,
+      [&] (void* p) { new (p) value_type(val); }
+    );
+  }
+
+  // The true_type means we should forward to the size_t,value_type
+  // overload.
+  void constructImpl(size_type n, value_type const& val, std::true_type) {
+    doConstruct(n, val);
+  }
+
+  void makeSize(size_type size, value_type* v = NULL) {
+    makeSize(size, v, size - 1);
+  }
+
+  /*
+   * Ensure we have a large enough memory region to be size `size'.
+   * Will move/copy elements if we are spilling to heap_ or needed to
+   * allocate a new region, but if resized in place doesn't initialize
+   * anything in the new region.  In any case doesn't change size().
+   * Supports insertion of new element during reallocation by given
+   * pointer to new element and position of new element.
+   * NOTE: If reallocation is not needed, and new element should be
+   * inserted in the middle of vector (not at the end), do the move
+   * objects and insertion outside the function, otherwise exception is thrown.
+   */
+  void makeSize(size_type size, value_type* v, size_type pos) {
+    if (size > this->max_size()) {
+      throw std::length_error("max_size exceeded in small_vector");
+    }
+    if (size <= this->capacity()) {
+      return;
+    }
+
+    auto needBytes = size * sizeof(value_type);
+    // If the capacity isn't explicitly stored inline, but the heap
+    // allocation is grown to over some threshold, we should store
+    // a capacity at the front of the heap allocation.
+    bool heapifyCapacity =
+      !kHasInlineCapacity && needBytes > kHeapifyCapacityThreshold;
+    if (heapifyCapacity) {
+      needBytes += kHeapifyCapacitySize;
+    }
+    auto const sizeBytes = goodMallocSize(needBytes);
+    void* newh = std::malloc(sizeBytes);
+    if (!newh) {
+      throw std::bad_alloc();
+    }
+    // We expect newh to be at least 2-aligned, because we want to
+    // use its least significant bit as a flag.
+    assert(!detail::pointerFlagGet(newh));
+
+    value_type* newp = static_cast<value_type*>(
+      heapifyCapacity ?
+        detail::shiftPointer(newh, kHeapifyCapacitySize) :
+        newh);
+
+    if (v != NULL) {
+      // move new element
+      try {
+        new (&newp[pos]) value_type(std::move(*v));
+      } catch (...) {
+        std::free(newh);
+        throw;
+      }
+
+      // move old elements to the left of the new one
+      try {
+        detail::moveToUninitialized(begin(), begin() + pos, newp);
+      } catch (...) {
+        newp[pos].~value_type();
+        std::free(newh);
+        throw;
+      }
+
+      // move old elements to the right of the new one
+      try {
+        if (pos < size-1) {
+          detail::moveToUninitialized(begin() + pos, end(), newp + pos + 1);
+        }
+      } catch (...) {
+        for (size_type i = 0; i <= pos; ++i) {
+          newp[i].~value_type();
+        }
+        std::free(newh);
+        throw;
+      }
+    } else {
+      // move without inserting new element
+      try {
+        detail::moveToUninitialized(begin(), end(), newp);
+      } catch (...) {
+        std::free(newh);
+        throw;
+      }
+    }
+    for (auto& val : *this) {
+      val.~value_type();
+    }
+
+    if (this->isExtern()) {
+      u.freeHeap();
+    }
+    auto availableSizeBytes = sizeBytes;
+    if (heapifyCapacity) {
+      u.pdata_.heap_ = detail::pointerFlagSet(newh);
+      availableSizeBytes -= kHeapifyCapacitySize;
+    } else {
+      u.pdata_.heap_ = newh;
+    }
+    this->setExtern(true);
+    this->setCapacity(availableSizeBytes / sizeof(value_type));
+  }
+
+  /*
+   * This will set the capacity field, stored inline in the storage_ field
+   * if there is sufficient room to store it.
+   */
+  void setCapacity(size_type newCapacity) {
+    assert(this->isExtern());
+    if (u.hasCapacity()) {
+      assert(newCapacity < std::numeric_limits<InternalSizeType>::max());
+      *u.getCapacity() = InternalSizeType(newCapacity);
+    }
+  }
+
+private:
+  struct HeapPtrWithCapacity {
+    void* heap_;
+    InternalSizeType capacity_;
+
+    InternalSizeType* getCapacity() {
+      return &capacity_;
+    }
+  } FB_PACKED;
+
+  struct HeapPtr {
+    // Lower order bit of heap_ is used as flag to indicate whether capacity is
+    // stored at the front of the heap allocation.
+    void* heap_;
+
+    InternalSizeType* getCapacity() {
+      assert(detail::pointerFlagGet(heap_));
+      return static_cast<InternalSizeType*>(
+        detail::pointerFlagClear(heap_));
+    }
+  } FB_PACKED;
+
+#if defined(__x86_64_)
+  typedef unsigned char InlineStorageType[sizeof(value_type) * MaxInline];
+#else
+  typedef typename std::aligned_storage<
+    sizeof(value_type) * MaxInline,
+    alignof(value_type)
+  >::type InlineStorageType;
+#endif
+
+  static bool const kHasInlineCapacity =
+    sizeof(HeapPtrWithCapacity) < sizeof(InlineStorageType);
+
+  // This value should we multiple of word size.
+  static size_t const kHeapifyCapacitySize = sizeof(
+    typename std::aligned_storage<
+      sizeof(InternalSizeType),
+      alignof(value_type)
+    >::type);
+  // Threshold to control capacity heapifying.
+  static size_t const kHeapifyCapacityThreshold =
+    100 * kHeapifyCapacitySize;
+
+  typedef typename std::conditional<
+    kHasInlineCapacity,
+    HeapPtrWithCapacity,
+    HeapPtr
+  >::type PointerType;
+
+  union Data {
+    explicit Data() { pdata_.heap_ = 0; }
+
+    PointerType pdata_;
+    InlineStorageType storage_;
+
+    value_type* buffer() noexcept {
+      void* vp = &storage_;
+      return static_cast<value_type*>(vp);
+    }
+    value_type const* buffer() const noexcept {
+      return const_cast<Data*>(this)->buffer();
+    }
+    value_type* heap() noexcept {
+      if (kHasInlineCapacity || !detail::pointerFlagGet(pdata_.heap_)) {
+        return static_cast<value_type*>(pdata_.heap_);
+      }
+      return static_cast<value_type*>(
+        detail::shiftPointer(
+          detail::pointerFlagClear(pdata_.heap_), kHeapifyCapacitySize));
+    }
+    value_type const* heap() const noexcept {
+      return const_cast<Data*>(this)->heap();
+    }
+
+    bool hasCapacity() const {
+      return kHasInlineCapacity || detail::pointerFlagGet(pdata_.heap_);
+    }
+    InternalSizeType* getCapacity() {
+      return pdata_.getCapacity();
+    }
+    InternalSizeType* getCapacity() const {
+      return const_cast<Data*>(this)->getCapacity();
+    }
+
+    void freeHeap() {
+      auto vp = detail::pointerFlagClear(pdata_.heap_);
+      std::free(vp);
+    }
+  } FB_PACKED u;
+} FB_PACKED;
+
+//////////////////////////////////////////////////////////////////////
+
+// Basic guarantee only, or provides the nothrow guarantee iff T has a
+// nothrow move or copy constructor.
+template<class T, std::size_t MaxInline, class A, class B, class C>
+void swap(small_vector<T,MaxInline,A,B,C>& a,
+          small_vector<T,MaxInline,A,B,C>& b) {
+  a.swap(b);
+}
+
+//////////////////////////////////////////////////////////////////////
+
+}
+
+#ifdef FB_PACKED
+# undef FB_PACKED
+#endif
+
+#endif
diff --git a/folly/sorted_vector_types.h b/folly/sorted_vector_types.h
new file mode 100644
index 00000000..dd091b76
--- /dev/null
+++ b/folly/sorted_vector_types.h
@@ -0,0 +1,606 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This header defines two classes that very nearly model
+ * AssociativeContainer (but not quite).  These implement set-like and
+ * map-like behavior on top of a sorted vector, instead of using
+ * rb-trees like std::set and std::map.
+ *
+ * This is potentially useful in cases where the number of elements in
+ * the set or map is small, or when you want to avoid using more
+ * memory than necessary and insertions/deletions are much more rare
+ * than lookups (these classes have O(N) insertions/deletions).
+ *
+ * In the interest of using these in conditions where the goal is to
+ * minimize memory usage, they support a GrowthPolicy parameter, which
+ * is a class defining a single function called increase_capacity,
+ * which will be called whenever we are about to insert something: you
+ * can then decide to call reserve() based on the current capacity()
+ * and size() of the passed in vector-esque Container type.  An
+ * example growth policy that grows one element at a time:
+ *
+ *    struct OneAtATimePolicy {
+ *      template<class Container>
+ *      void increase_capacity(Container& c) {
+ *        if (c.size() == c.capacity()) {
+ *          c.reserve(c.size() + 1);
+ *        }
+ *      }
+ *    };
+ *
+ *    typedef sorted_vector_set<int,
+ *                              std::less<int>,
+ *                              std::allocator<int>,
+ *                              OneAtATimePolicy>
+ *            OneAtATimeIntSet;
+ *
+ * Important differences from std::set and std::map:
+ *   - insert() and erase() invalidate iterators and references
+ *   - insert() and erase() are O(N)
+ *   - our iterators model RandomAccessIterator
+ *   - sorted_vector_map::value_type is pair<K,V>, not pair<const K,V>.
+ *     (This is basically because we want to store the value_type in
+ *     std::vector<>, which requires it to be Assignable.)
+ */
+
+#ifndef FOLLY_SORTED_VECTOR_TYPES_H_
+#define FOLLY_SORTED_VECTOR_TYPES_H_
+
+#include <vector>
+#include <algorithm>
+#include <utility>
+#include <iterator>
+#include <boost/operators.hpp>
+#include <boost/bind.hpp>
+#include <boost/type_traits/is_same.hpp>
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+namespace detail {
+
+  // This wrapper goes around a GrowthPolicy and provides iterator
+  // preservation semantics, but only if the growth policy is not the
+  // default (i.e. nothing).
+  template<class Policy>
+  struct growth_policy_wrapper : private Policy {
+    template<class Container, class Iterator>
+    Iterator increase_capacity(Container& c, Iterator desired_insertion)
+    {
+      typedef typename Container::difference_type diff_t;
+      diff_t d = desired_insertion - c.begin();
+      Policy::increase_capacity(c);
+      return c.begin() + d;
+    }
+  };
+  template<>
+  struct growth_policy_wrapper<void> {
+    template<class Container, class Iterator>
+    Iterator increase_capacity(Container&, Iterator it) {
+      return it;
+    }
+  };
+
+  /*
+   * This helper returns the distance between two iterators if it is
+   * possible to figure it out without messing up the range
+   * (i.e. unless they are InputIterators).  Otherwise this returns
+   * -1.
+   */
+  template<class Iterator>
+  int distance_if_multipass(Iterator first, Iterator last) {
+    typedef typename std::iterator_traits<Iterator>::iterator_category categ;
+    if (boost::is_same<categ,std::input_iterator_tag>::value)
+      return -1;
+    return std::distance(first, last);
+  }
+
+  template<class OurContainer, class Vector, class GrowthPolicy>
+  std::pair<typename OurContainer::iterator,bool>
+  insert_with_hint(OurContainer& sorted,
+                   Vector& cont,
+                   typename OurContainer::iterator hint,
+                   typename OurContainer::value_type value,
+                   GrowthPolicy& po)
+  {
+    const typename OurContainer::value_compare& cmp(sorted.value_comp());
+    if (hint == cont.end() || cmp(value, *hint)) {
+      if (hint == cont.begin()) {
+        po.increase_capacity(cont, cont.begin());
+        return std::make_pair(cont.insert(cont.begin(), value), true);
+      }
+      if (cmp(*(hint - 1), value)) {
+        hint = po.increase_capacity(cont, hint);
+        return std::make_pair(cont.insert(hint, value), true);
+      }
+      return sorted.insert(value);
+    }
+
+    if (cmp(*hint, value)) {
+      if (hint + 1 == cont.end() || cmp(value, *(hint + 1))) {
+        typename OurContainer::iterator it =
+          po.increase_capacity(cont, hint + 1);
+        return std::make_pair(cont.insert(it, value), true);
+      }
+    }
+
+    // Value and *hint did not compare, so they are equal keys.
+    return std::make_pair(hint, false);
+  }
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+/**
+ * A sorted_vector_set is a container similar to std::set<>, but
+ * implemented as as a sorted array with std::vector<>.
+ *
+ * @param class T               Data type to store
+ * @param class Compare         Comparison function that imposes a
+ *                              strict weak ordering over instances of T
+ * @param class Allocator       allocation policy
+ * @param class GrowthPolicy    policy object to control growth
+ *
+ * @author Aditya Agarwal <aditya@fb.com>
+ * @author Akhil Wable    <akhil@fb.com>
+ * @author Jordan DeLong  <delong.j@fb.com>
+ */
+template<class T,
+         class Compare      = std::less<T>,
+         class Allocator    = std::allocator<T>,
+         class GrowthPolicy = void>
+class sorted_vector_set
+  : boost::totally_ordered1<
+      sorted_vector_set<T,Compare,Allocator,GrowthPolicy>
+    , detail::growth_policy_wrapper<GrowthPolicy> >
+{
+  typedef std::vector<T,Allocator> ContainerT;
+
+  detail::growth_policy_wrapper<GrowthPolicy>&
+  get_growth_policy() { return *this; }
+
+public:
+  typedef T       value_type;
+  typedef T       key_type;
+  typedef Compare key_compare;
+  typedef Compare value_compare;
+
+  typedef typename ContainerT::pointer                pointer;
+  typedef typename ContainerT::reference              reference;
+  typedef typename ContainerT::const_reference        const_reference;
+  /*
+   * XXX: Our normal iterator ought to also be a constant iterator
+   * (cf. Defect Report 103 for std::set), but this is a bit more of a
+   * pain.
+   */
+  typedef typename ContainerT::iterator               iterator;
+  typedef typename ContainerT::const_iterator         const_iterator;
+  typedef typename ContainerT::difference_type        difference_type;
+  typedef typename ContainerT::size_type              size_type;
+  typedef typename ContainerT::reverse_iterator       reverse_iterator;
+  typedef typename ContainerT::const_reverse_iterator const_reverse_iterator;
+
+  explicit sorted_vector_set(const Compare& comp = Compare(),
+                             const Allocator& alloc = Allocator())
+    : m_(comp, alloc)
+  {}
+
+  template<class InputIterator>
+  explicit sorted_vector_set(
+      InputIterator first,
+      InputIterator last,
+      const Compare& comp = Compare(),
+      const Allocator& alloc = Allocator())
+    : m_(comp, alloc)
+  {
+    // This is linear if [first, last) is already sorted (and if we
+    // can figure out the distance between the two iterators).
+    insert(first, last);
+  }
+
+  key_compare key_comp() const { return m_; }
+  value_compare value_comp() const { return m_; }
+
+  iterator begin()                      { return m_.cont_.begin();  }
+  iterator end()                        { return m_.cont_.end();    }
+  const_iterator begin() const          { return m_.cont_.begin();  }
+  const_iterator end() const            { return m_.cont_.end();    }
+  reverse_iterator rbegin()             { return m_.cont_.rbegin(); }
+  reverse_iterator rend()               { return m_.cont_.rend();   }
+  const_reverse_iterator rbegin() const { return m_.cont_.rbegin(); }
+  const_reverse_iterator rend() const   { return m_.cont_.rend();   }
+
+  void clear()                  { return m_.cont_.clear();    }
+  size_type size() const        { return m_.cont_.size();     }
+  size_type max_size() const    { return m_.cont_.max_size(); }
+  bool empty() const            { return m_.cont_.empty();    }
+  void reserve(size_type s)     { return m_.cont_.reserve(s); }
+  size_type capacity() const    { return m_.cont_.capacity(); }
+
+  std::pair<iterator,bool> insert(const value_type& value) {
+    iterator it = lower_bound(value);
+    if (it == end() || value_comp()(value, *it)) {
+      it = get_growth_policy().increase_capacity(m_.cont_, it);
+      return std::make_pair(m_.cont_.insert(it, value), true);
+    }
+    return std::make_pair(it, false);
+  }
+
+  std::pair<iterator,bool> insert(iterator hint, const value_type& value) {
+    return detail::insert_with_hint(*this, m_.cont_, hint, value,
+      get_growth_policy());
+  }
+
+  template<class InputIterator>
+  void insert(InputIterator first, InputIterator last) {
+    int d = detail::distance_if_multipass(first, last);
+    if (d != -1) {
+      m_.cont_.reserve(m_.cont_.size() + d);
+    }
+    for (; first != last; ++first) {
+      insert(end(), *first);
+    }
+  }
+
+  size_type erase(const key_type& key) {
+    iterator it = lower_bound(key);
+    if (it == end()) {
+      return 0;
+    }
+    m_.cont_.erase(it);
+    return 1;
+  }
+
+  void erase(iterator it) {
+    m_.cont_.erase(it);
+  }
+
+  void erase(iterator first, iterator last) {
+    m_.cont_.erase(first, last);
+  }
+
+  iterator find(const key_type& key) {
+    iterator it = lower_bound(key);
+    if (it == end() || !key_comp()(key, *it))
+      return it;
+    return end();
+  }
+
+  const_iterator find(const key_type& key) const {
+    const_iterator it = lower_bound(key);
+    if (it == end() || !key_comp()(key, *it))
+      return it;
+    return end();
+  }
+
+  size_type count(const key_type& key) const {
+    return find(key) == end() ? 0 : 1;
+  }
+
+  iterator lower_bound(const key_type& key) {
+    return std::lower_bound(begin(), end(), key, key_comp());
+  }
+
+  const_iterator lower_bound(const key_type& key) const {
+    return std::lower_bound(begin(), end(), key, key_comp());
+  }
+
+  iterator upper_bound(const key_type& key) {
+    return std::upper_bound(begin(), end(), key, key_comp());
+  }
+
+  const_iterator upper_bound(const key_type& key) const {
+    return std::upper_bound(begin(), end(), key, key_comp());
+  }
+
+  std::pair<iterator,iterator> equal_range(const key_type& key) {
+    return std::equal_range(begin(), end(), key, key_comp());
+  }
+
+  std::pair<const_iterator,const_iterator>
+  equal_range(const key_type& key) const {
+    return std::equal_range(begin(), end(), key, key_comp());
+  }
+
+  // Nothrow as long as swap() on the Compare type is nothrow.
+  void swap(sorted_vector_set& o) {
+    using std::swap;  // Allow ADL for swap(); fall back to std::swap().
+    Compare& a = m_;
+    Compare& b = o.m_;
+    swap(a, b);
+    m_.cont_.swap(o.m_.cont_);
+  }
+
+  bool operator==(const sorted_vector_set& other) const {
+    return other.m_.cont_ == m_.cont_;
+  }
+
+  bool operator<(const sorted_vector_set& other) const {
+    return m_.cont_ < other.m_.cont_;
+  }
+
+private:
+  /*
+   * This structure derives from the comparison object in order to
+   * make use of the empty base class optimization if our comparison
+   * functor is an empty class (usual case).
+   *
+   * Wrapping up this member like this is better than deriving from
+   * the Compare object ourselves (there are some perverse edge cases
+   * involving virtual functions).
+   *
+   * More info:  http://www.cantrip.org/emptyopt.html
+   */
+  struct EBO : Compare {
+    explicit EBO(const Compare& c, const Allocator& alloc)
+      : Compare(c)
+      , cont_(alloc)
+    {}
+    ContainerT cont_;
+  } m_;
+};
+
+// Swap function that can be found using ADL.
+template<class T, class C, class A, class G>
+inline void swap(sorted_vector_set<T,C,A,G>& a,
+                 sorted_vector_set<T,C,A,G>& b) {
+  return a.swap(b);
+}
+
+//////////////////////////////////////////////////////////////////////
+
+/**
+ * A sorted_vector_map is similar to a sorted_vector_set but stores
+ * <key,value> pairs instead of single elements.
+ *
+ * @param class Key           Key type
+ * @param class Value         Value type
+ * @param class Compare       Function that can compare key types and impose
+ *                            a strict weak ordering over them.
+ * @param class Allocator     allocation policy
+ * @param class GrowthPolicy  policy object to control growth
+ *
+ * @author Aditya Agarwal <aditya@fb.com>
+ * @author Akhil Wable    <akhil@fb.com>
+ * @author Jordan DeLong  <delong.j@fb.com>
+ */
+template<class Key,
+         class Value,
+         class Compare        = std::less<Key>,
+         class Allocator      = std::allocator<std::pair<Key,Value> >,
+         class GrowthPolicy   = void>
+class sorted_vector_map
+  : boost::totally_ordered1<
+      sorted_vector_map<Key,Value,Compare,Allocator,GrowthPolicy>
+    , detail::growth_policy_wrapper<GrowthPolicy> >
+{
+  typedef std::vector<std::pair<Key,Value>,Allocator> ContainerT;
+
+  detail::growth_policy_wrapper<GrowthPolicy>&
+  get_growth_policy() { return *this; }
+
+public:
+  typedef Key                                       key_type;
+  typedef Value                                     mapped_type;
+  typedef std::pair<key_type,mapped_type>           value_type;
+  typedef Compare                                   key_compare;
+
+  struct value_compare
+    : std::binary_function<value_type,value_type,bool>
+    , private Compare
+  {
+    bool operator()(const value_type& a, const value_type& b) const {
+      return Compare::operator()(a.first, b.first);
+    }
+
+  protected:
+    friend class sorted_vector_map;
+    explicit value_compare(const Compare& c) : Compare(c) {}
+  };
+
+  typedef typename ContainerT::pointer                pointer;
+  typedef typename ContainerT::reference              reference;
+  typedef typename ContainerT::const_reference        const_reference;
+  typedef typename ContainerT::iterator               iterator;
+  typedef typename ContainerT::const_iterator         const_iterator;
+  typedef typename ContainerT::difference_type        difference_type;
+  typedef typename ContainerT::size_type              size_type;
+  typedef typename ContainerT::reverse_iterator       reverse_iterator;
+  typedef typename ContainerT::const_reverse_iterator const_reverse_iterator;
+
+  explicit sorted_vector_map(const Compare& comp = Compare(),
+                             const Allocator& alloc = Allocator())
+    : m_(value_compare(comp), alloc)
+  {}
+
+  template<class InputIterator>
+  explicit sorted_vector_map(
+      InputIterator first,
+      InputIterator last,
+      const Compare& comp = Compare(),
+      const Allocator& alloc = Allocator())
+    : m_(value_compare(comp), alloc)
+  {
+    insert(first, last);
+  }
+
+  key_compare key_comp() const { return m_; }
+  value_compare value_comp() const { return m_; }
+
+  iterator begin()                      { return m_.cont_.begin();  }
+  iterator end()                        { return m_.cont_.end();    }
+  const_iterator begin() const          { return m_.cont_.begin();  }
+  const_iterator end() const            { return m_.cont_.end();    }
+  reverse_iterator rbegin()             { return m_.cont_.rbegin(); }
+  reverse_iterator rend()               { return m_.cont_.rend();   }
+  const_reverse_iterator rbegin() const { return m_.cont_.rbegin(); }
+  const_reverse_iterator rend() const   { return m_.cont_.rend();   }
+
+  void clear()                  { return m_.cont_.clear();    }
+  size_type size() const        { return m_.cont_.size();     }
+  size_type max_size() const    { return m_.cont_.max_size(); }
+  bool empty() const            { return m_.cont_.empty();    }
+  void reserve(size_type s)     { return m_.cont_.reserve(s); }
+  size_type capacity() const    { return m_.cont_.capacity(); }
+
+  std::pair<iterator,bool> insert(const value_type& value) {
+    iterator it = lower_bound(value.first);
+    if (it == end() || value_comp()(value, *it)) {
+      it = get_growth_policy().increase_capacity(m_.cont_, it);
+      return std::make_pair(m_.cont_.insert(it, value), true);
+    }
+    return std::make_pair(it, false);
+  }
+
+  std::pair<iterator,bool> insert(iterator hint, const value_type& value) {
+    return detail::insert_with_hint(*this, m_.cont_, hint, value,
+      get_growth_policy());
+  }
+
+  template<class InputIterator>
+  void insert(InputIterator first, InputIterator last) {
+    int d = detail::distance_if_multipass(first, last);
+    if (d != -1) {
+      m_.cont_.reserve(m_.cont_.size() + d);
+    }
+    for (; first != last; ++first) {
+      insert(end(), *first);
+    }
+  }
+
+  size_type erase(const key_type& key) {
+    iterator it = find(key);
+    if (it == end()) {
+      return 0;
+    }
+    m_.cont_.erase(it);
+    return 1;
+  }
+
+  void erase(iterator it) {
+    m_.cont_.erase(it);
+  }
+
+  void erase(iterator first, iterator last) {
+    m_.cont_.erase(first, last);
+  }
+
+  iterator find(const key_type& key) {
+    iterator it = lower_bound(key);
+    if (it == end() || !key_comp()(key, it->first))
+      return it;
+    return end();
+  }
+
+  const_iterator find(const key_type& key) const {
+    const_iterator it = lower_bound(key);
+    if (it == end() || !key_comp()(key, it->first))
+      return it;
+    return end();
+  }
+
+  size_type count(const key_type& key) {
+    return find(key) == end() ? 0 : 1;
+  }
+
+  iterator lower_bound(const key_type& key) {
+    return std::lower_bound(begin(), end(), key,
+      boost::bind(key_comp(), boost::bind(&value_type::first, _1), _2));
+  }
+
+  const_iterator lower_bound(const key_type& key) const {
+    return std::lower_bound(begin(), end(), key,
+      boost::bind(key_comp(), boost::bind(&value_type::first, _1), _2));
+  }
+
+  iterator upper_bound(const key_type& key) {
+    return std::upper_bound(begin(), end(), key,
+      boost::bind(key_comp(), _1, boost::bind(&value_type::first, _2)));
+  }
+
+  const_iterator upper_bound(const key_type& key) const {
+    return std::upper_bound(begin(), end(), key,
+      boost::bind(key_comp(), _1, boost::bind(&value_type::first, _2)));
+  }
+
+  std::pair<iterator,iterator> equal_range(const key_type& key) {
+    // Note: std::equal_range can't be passed a functor that takes
+    // argument types different from the iterator value_type, so we
+    // have to do this.
+    iterator low = lower_bound(key);
+    iterator high = std::upper_bound(low, end(), key,
+      boost::bind(key_comp(), _1, boost::bind(&value_type::first, _2)));
+    return std::make_pair(low, high);
+  }
+
+  std::pair<const_iterator,const_iterator>
+  equal_range(const key_type& key) const {
+    return const_cast<sorted_vector_map*>(this)->equal_range(key);
+  }
+
+  // Nothrow as long as swap() on the Compare type is nothrow.
+  void swap(sorted_vector_map& o) {
+    using std::swap; // Allow ADL for swap(); fall back to std::swap().
+    Compare& a = m_;
+    Compare& b = o.m_;
+    swap(a, b);
+    m_.cont_.swap(o.m_.cont_);
+  }
+
+  mapped_type& operator[](const key_type& key) {
+    iterator it = lower_bound(key);
+    if (it == end() || key_comp()(key, it->first)) {
+      return insert(it, value_type(key, mapped_type())).first->second;
+    }
+    return it->second;
+  }
+
+  bool operator==(const sorted_vector_map& other) const {
+    return m_.cont_ == other.m_.cont_;
+  }
+
+  bool operator<(const sorted_vector_map& other) const {
+    return m_.cont_ < other.m_.cont_;
+  }
+
+private:
+  // This is to get the empty base optimization; see the comment in
+  // sorted_vector_set.
+  struct EBO : value_compare {
+    explicit EBO(const value_compare& c, const Allocator& alloc)
+      : value_compare(c)
+      , cont_(alloc)
+    {}
+    ContainerT cont_;
+  } m_;
+};
+
+// Swap function that can be found using ADL.
+template<class K, class V, class C, class A, class G>
+inline void swap(sorted_vector_map<K,V,C,A,G>& a,
+                 sorted_vector_map<K,V,C,A,G>& b) {
+  return a.swap(b);
+}
+
+//////////////////////////////////////////////////////////////////////
+
+}
+
+#endif
+
diff --git a/folly/test/AtomicHashArrayTest.cpp b/folly/test/AtomicHashArrayTest.cpp
new file mode 100644
index 00000000..42fe1bc1
--- /dev/null
+++ b/folly/test/AtomicHashArrayTest.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/AtomicHashArray.h"
+#include "folly/Hash.h"
+#include "folly/Conv.h"
+#include <gtest/gtest.h>
+
+using namespace std;
+using namespace folly;
+
+template<class KeyT, class ValueT>
+pair<KeyT,ValueT> createEntry(int i) {
+  return pair<KeyT,ValueT>(to<KeyT>(folly::hash::jenkins_rev_mix32(i) % 1000),
+                           to<ValueT>(i + 3));
+}
+
+template<class KeyT, class ValueT>
+void testMap() {
+  typedef AtomicHashArray<KeyT, ValueT>  MyArr;
+  auto arr = MyArr::create(150);
+  map<KeyT, ValueT> ref;
+  for (int i = 0; i < 100; ++i) {
+    auto e = createEntry<KeyT, ValueT>(i);
+    auto ret = arr->insert(e);
+    EXPECT_EQ(!ref.count(e.first), ret.second);  // succeed iff not in ref
+    ref.insert(e);
+    EXPECT_EQ(ref.size(), arr->size());
+    if (ret.first == arr->end()) {
+      EXPECT_FALSE("AHA should not have run out of space.");
+      continue;
+    }
+    EXPECT_EQ(e.first, ret.first->first);
+    EXPECT_EQ(ref.find(e.first)->second, ret.first->second);
+  }
+
+  for (int i = 125; i > 0; i -= 10) {
+    auto e = createEntry<KeyT, ValueT>(i);
+    auto ret = arr->erase(e.first);
+    auto refRet = ref.erase(e.first);
+    EXPECT_EQ(ref.size(), arr->size());
+    EXPECT_EQ(refRet, ret);
+  }
+
+  for (int i = 155; i > 0; i -= 10) {
+    auto e = createEntry<KeyT, ValueT>(i);
+    auto ret = arr->insert(e);
+    auto refRet = ref.insert(e);
+    EXPECT_EQ(ref.size(), arr->size());
+    EXPECT_EQ(*refRet.first, *ret.first);
+    EXPECT_EQ(refRet.second, ret.second);
+  }
+
+  for (const auto& e : ref) {
+    auto ret = arr->find(e.first);
+    if (ret == arr->end()) {
+      EXPECT_FALSE("Key was not in AHA");
+      continue;
+    }
+    EXPECT_EQ(e.first, ret->first);
+    EXPECT_EQ(e.second, ret->second);
+  }
+}
+
+TEST(Aha, InsertErase_i32_i32) {
+  testMap<int32_t,int32_t>();
+}
+TEST(Aha, InsertErase_i64_i32) {
+  testMap<int64_t,int32_t>();
+}
+TEST(Aha, InsertErase_i64_i64) {
+  testMap<int64_t,int64_t>();
+}
+TEST(Aha, InsertErase_i32_i64) {
+  testMap<int32_t,int64_t>();
+}
+TEST(Aha, InsertErase_i32_str) {
+  testMap<int32_t,string>();
+}
+TEST(Aha, InsertErase_i64_str) {
+  testMap<int64_t,string>();
+}
diff --git a/folly/test/AtomicHashMapTest.cpp b/folly/test/AtomicHashMapTest.cpp
new file mode 100644
index 00000000..d869c1cf
--- /dev/null
+++ b/folly/test/AtomicHashMapTest.cpp
@@ -0,0 +1,807 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/AtomicHashMap.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+#include <sys/time.h>
+#include <thread>
+#include <atomic>
+
+#include "folly/Benchmark.h"
+#include "folly/Conv.h"
+
+using std::vector;
+using std::string;
+using folly::AtomicHashMap;
+using folly::AtomicHashArray;
+
+// Tunables:
+DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
+DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
+DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
+DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
+
+const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
+const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
+
+static int64_t nowInUsec() {
+  timeval tv;
+  gettimeofday(&tv, 0);
+  return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
+}
+
+TEST(Ahm, BasicStrings) {
+  typedef AtomicHashMap<int64_t,string> AHM;
+  AHM myMap(1024);
+  EXPECT_TRUE(myMap.begin() == myMap.end());
+
+  for (int i = 0; i < 100; ++i) {
+    myMap.insert(make_pair(i, folly::to<string>(i)));
+  }
+  for (int i = 0; i < 100; ++i) {
+    EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
+  }
+
+  myMap.insert(std::make_pair(999, "A"));
+  myMap.insert(std::make_pair(999, "B"));
+  EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
+  myMap.find(999)->second = "B";
+  myMap.find(999)->second = "C";
+  EXPECT_EQ(myMap.find(999)->second, "C");
+  EXPECT_EQ(myMap.find(999)->first, 999);
+}
+
+typedef int32_t     KeyT;
+typedef int64_t     KeyTBig;
+typedef int32_t     ValueT;
+
+typedef AtomicHashMap<KeyT,ValueT> AHMapT;
+typedef AHMapT::value_type RecordT;
+typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
+
+AHArrayT::Config config;
+static AHArrayT::SmartPtr globalAHA(nullptr);
+static std::unique_ptr<AHMapT> globalAHM;
+
+// Generate a deterministic value based on an input key
+static int genVal(int key) {
+  return key / 3;
+}
+
+TEST(Ahm, grow) {
+  VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
+    sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
+  int numEntries = 10000;
+  float sizeFactor = 0.46;
+
+  std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
+
+  // load map - make sure we succeed and the index is accurate
+  bool success = true;
+  for (uint64_t i = 0; i < numEntries; i++) {
+    auto ret = m->insert(RecordT(i, genVal(i)));
+    success &= ret.second;
+    success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
+  }
+  // Overwrite vals to make sure there are no dups
+  // Every insert should fail because the keys are already in the map.
+  success = true;
+  for (uint64_t i = 0; i < numEntries; i++) {
+    auto ret = m->insert(RecordT(i, genVal(i * 2)));
+    success &= (ret.second == false);  // fail on collision
+    success &= (ret.first->second == genVal(i)); // return the previous value
+    success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
+  }
+  EXPECT_TRUE(success);
+
+  // check correctness
+  size_t cap = m->capacity();
+  ValueT val;
+  EXPECT_GT(m->numSubMaps(), 1);  // make sure we grew
+  success = true;
+  EXPECT_EQ(m->size(), numEntries);
+  for (int i = 0; i < numEntries; i++) {
+    success &= (m->find(i)->second == genVal(i));
+  }
+  EXPECT_TRUE(success);
+
+  // Check findAt
+  success = true;
+  KeyT key(0);
+  AHMapT::const_iterator retIt;
+  for (uint64_t i = 0; i < numEntries; i++) {
+    retIt = m->find(i);
+    retIt = m->findAt(retIt.getIndex());
+    success &= (retIt->second == genVal(i));
+    success &= (retIt->first == i);
+  }
+  EXPECT_TRUE(success);
+
+  // Try modifying value
+  m->find(8)->second = 5309;
+  EXPECT_EQ(m->find(8)->second, 5309);
+
+  // check clear()
+  m->clear();
+  success = true;
+  for (uint64_t i = 0; i < numEntries / 2; i++) {
+    success &= m->insert(RecordT(i, genVal(i))).second;
+  }
+  EXPECT_TRUE(success);
+  EXPECT_EQ(m->size(), numEntries / 2);
+}
+
+TEST(Ahm, iterator) {
+  int numEntries = 10000;
+  float sizeFactor = .46;
+  std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
+
+  // load map - make sure we succeed and the index is accurate
+  for (uint64_t i = 0; i < numEntries; i++) {
+    m->insert(RecordT(i, genVal(i)));
+  }
+
+  bool success = true;
+  int count = 0;
+  FOR_EACH(it, *m) {
+    success &= (it->second == genVal(it->first));
+    ++count;
+  }
+  EXPECT_TRUE(success);
+  EXPECT_EQ(count, numEntries);
+}
+
+class Counters {
+private:
+  // NOTE: Unfortunately can't currently put a std::atomic<int64_t> in
+  // the value in ahm since it doesn't support non-copyable but
+  // move-constructible value types yet.
+  AtomicHashMap<int64_t,int64_t> ahm;
+
+public:
+  explicit Counters(size_t numCounters) : ahm(numCounters) {}
+
+  void increment(int64_t obj_id) {
+    auto ret = ahm.insert(std::make_pair(obj_id, 1));
+    if (!ret.second) {
+      // obj_id already exists, increment count
+      __sync_fetch_and_add(&ret.first->second, 1);
+    }
+  }
+
+  int64_t getValue(int64_t obj_id) {
+    auto ret = ahm.find(obj_id);
+    return ret != ahm.end() ? ret->second : 0;
+  }
+
+  // export the counters without blocking increments
+  string toString() {
+    string ret = "{\n";
+    ret.reserve(ahm.size() * 32);
+    for (const auto& e : ahm) {
+      ret += folly::to<string>(
+        "  [", e.first, ":", e.second, "]\n");
+    }
+    ret += "}\n";
+    return ret;
+  }
+};
+
+// If you get an error "terminate called without an active exception", there
+// might be too many threads getting created - decrease numKeys and/or mult.
+TEST(Ahm, counter) {
+  const int numKeys = 10;
+  const int mult = 10;
+  Counters c(numKeys);
+  vector<int64_t> keys;
+  FOR_EACH_RANGE(i, 1, numKeys) {
+    keys.push_back(i);
+  }
+  vector<std::thread> threads;
+  for (auto key : keys) {
+    FOR_EACH_RANGE(i, 0, key * mult) {
+      threads.push_back(std::thread([&, key] { c.increment(key); }));
+    }
+  }
+  for (auto& t : threads) {
+    t.join();
+  }
+  string str = c.toString();
+  for (auto key : keys) {
+    int val = key * mult;
+    EXPECT_EQ(val, c.getValue(key));
+    EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
+  }
+}
+
+class Integer {
+
+ public:
+  explicit Integer(KeyT v = 0) : v_(v) {}
+
+  Integer& operator=(const Integer& a) {
+    static bool throwException_ = false;
+    throwException_ = !throwException_;
+    if (throwException_) {
+      throw 1;
+    }
+    v_ = a.v_;
+    return *this;
+  }
+
+  bool operator==(const Integer& a) const { return v_ == a.v_; }
+
+ private:
+  KeyT v_;
+};
+
+TEST(Ahm, map_exception_safety) {
+  typedef AtomicHashMap<KeyT,Integer> MyMapT;
+
+  int numEntries = 10000;
+  float sizeFactor = 0.46;
+  std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
+
+  bool success = true;
+  int count = 0;
+  for (int i = 0; i < numEntries; i++) {
+    try {
+      m->insert(i, Integer(genVal(i)));
+      success &= (m->find(i)->second == Integer(genVal(i)));
+      ++count;
+    } catch (...) {
+      success &= !m->count(i);
+    }
+  }
+  EXPECT_EQ(count, m->size());
+  EXPECT_TRUE(success);
+}
+
+TEST(Ahm, basicErase) {
+  int numEntries = 3000;
+
+  std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
+  // Iterate filling up the map and deleting all keys a few times
+  // to test more than one subMap.
+  for (int iterations = 0; iterations < 4; ++iterations) {
+    // Testing insertion of keys
+    bool success = true;
+    for (uint64_t i = 0; i < numEntries; ++i) {
+      success &= !(s->count(i));
+      auto ret = s->insert(RecordT(i, i));
+      success &= s->count(i);
+      success &= ret.second;
+    }
+    EXPECT_TRUE(success);
+    EXPECT_EQ(s->size(), numEntries);
+
+    // Delete every key in the map and verify that the key is gone and the the
+    // size is correct.
+    success = true;
+    for (uint64_t i = 0; i < numEntries; ++i) {
+      success &= s->erase(i);
+      success &= (s->size() == numEntries - 1 - i);
+      success &= !(s->count(i));
+      success &= !(s->erase(i));
+    }
+    EXPECT_TRUE(success);
+  }
+  VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
+}
+
+namespace {
+
+inline KeyT randomizeKey(int key) {
+  // We deterministically randomize the key to more accurately simulate
+  // real-world usage, and to avoid pathalogical performance patterns (e.g.
+  // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
+  //
+  // Use a hash function we don't normally use for ints to avoid interactions.
+  return folly::hash::jenkins_rev_mix32(key);
+}
+
+int numOpsPerThread = 0;
+
+void* insertThread(void* jj) {
+  int64_t j = (int64_t) jj;
+  for (int i = 0; i < numOpsPerThread; ++i) {
+    KeyT key = randomizeKey(i + j * numOpsPerThread);
+    globalAHM->insert(key, genVal(key));
+  }
+  return NULL;
+}
+
+void* insertThreadArr(void* jj) {
+  int64_t j = (int64_t) jj;
+  for (int i = 0; i < numOpsPerThread; ++i) {
+    KeyT key = randomizeKey(i + j * numOpsPerThread);
+    globalAHA->insert(std::make_pair(key, genVal(key)));
+  }
+  return NULL;
+}
+
+std::atomic<bool> runThreadsCreatedAllThreads;
+void runThreads(void *(*thread)(void*), int numThreads, void **statuses) {
+  folly::BenchmarkSuspender susp;
+  runThreadsCreatedAllThreads.store(false);
+  vector<pthread_t> threadIds;
+  for (int64_t j = 0; j < numThreads; j++) {
+    pthread_t tid;
+    if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
+       LOG(ERROR) << "Could not start thread";
+    } else {
+      threadIds.push_back(tid);
+    }
+  }
+  susp.dismiss();
+
+  runThreadsCreatedAllThreads.store(true);
+  for (int i = 0; i < threadIds.size(); ++i) {
+    pthread_join(threadIds[i], statuses == NULL ? NULL : &statuses[i]);
+  }
+}
+
+void runThreads(void *(*thread)(void*)) {
+  runThreads(thread, FLAGS_numThreads, NULL);
+}
+
+}
+
+TEST(Ahm, collision_test) {
+  const int numInserts = 1000000 / 4;
+
+  // Doing the same number on each thread so we collide.
+  numOpsPerThread = numInserts;
+
+  float sizeFactor = 0.46;
+  int entrySize = sizeof(KeyT) + sizeof(ValueT);
+  VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
+    " Byte entries replicated in " << FLAGS_numThreads <<
+    " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
+
+  globalAHM.reset(new AHMapT(int(numInserts * sizeFactor), config));
+
+  size_t sizeInit = globalAHM->capacity();
+  VLOG(1) << "  Initial capacity: " << sizeInit;
+
+  double start = nowInUsec();
+  runThreads([](void*) -> void* { // collisionInsertThread
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      KeyT key = randomizeKey(i);
+      globalAHM->insert(key, genVal(key));
+    }
+    return nullptr;
+  });
+  double elapsed = nowInUsec() - start;
+
+  size_t finalCap = globalAHM->capacity();
+  size_t sizeAHM = globalAHM->size();
+  VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
+    " duplicate inserts (atomic).";
+  VLOG(1) << "  Final capacity: " << finalCap << " in " <<
+    globalAHM->numSubMaps() << " sub maps (" <<
+    sizeAHM * 100 / finalCap << "% load factor, " <<
+    (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
+
+  // check correctness
+  EXPECT_EQ(sizeAHM, numInserts);
+  bool success = true;
+  ValueT val;
+  for (int i = 0; i < numInserts; ++i) {
+    KeyT key = randomizeKey(i);
+    success &= (globalAHM->find(key)->second == genVal(key));
+  }
+  EXPECT_TRUE(success);
+
+  // check colliding finds
+  start = nowInUsec();
+  runThreads([](void*) -> void* { // collisionFindThread
+    KeyT key(0);
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      globalAHM->find(key);
+    }
+    return nullptr;
+  });
+
+  elapsed = nowInUsec() - start;
+
+  VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
+    " duplicate finds (atomic).";
+}
+
+namespace {
+
+const int kInsertPerThread = 100000;
+int raceFinalSizeEstimate;
+
+void* raceIterateThread(void* jj) {
+  int64_t j = (int64_t) jj;
+  int count = 0;
+
+  AHMapT::iterator it = globalAHM->begin();
+  AHMapT::iterator end = globalAHM->end();
+  for (; it != end; ++it) {
+    ++count;
+    if (count > raceFinalSizeEstimate) {
+      EXPECT_FALSE("Infinite loop in iterator.");
+      return NULL;
+    }
+  }
+  return NULL;
+}
+
+void* raceInsertRandomThread(void* jj) {
+  int64_t j = (int64_t) jj;
+  for (int i = 0; i < kInsertPerThread; ++i) {
+    KeyT key = rand();
+    globalAHM->insert(key, genVal(key));
+  }
+  return NULL;
+}
+
+}
+
+// Test for race conditions when inserting and iterating at the same time and
+// creating multiple submaps.
+TEST(Ahm, race_insert_iterate_thread_test) {
+  const int kInsertThreads = 20;
+  const int kIterateThreads = 20;
+  raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
+
+  VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
+    << " threads inserting and " << kIterateThreads << " threads iterating.";
+
+  globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
+
+  vector<pthread_t> threadIds;
+  for (int64_t j = 0; j < kInsertThreads + kIterateThreads; j++) {
+    pthread_t tid;
+    void *(*thread)(void*) =
+      (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
+    if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
+      LOG(ERROR) << "Could not start thread";
+    } else {
+      threadIds.push_back(tid);
+    }
+  }
+  for (int i = 0; i < threadIds.size(); ++i) {
+    pthread_join(threadIds[i], NULL);
+  }
+  VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
+  VLOG(1) << "Final size of map " << globalAHM->size();
+}
+
+namespace {
+
+const int kTestEraseInsertions = 200000;
+std::atomic<int32_t> insertedLevel;
+
+void* testEraseInsertThread(void*) {
+  for (int i = 0; i < kTestEraseInsertions; ++i) {
+    KeyT key = randomizeKey(i);
+    globalAHM->insert(key, genVal(key));
+    insertedLevel.store(i, std::memory_order_release);
+  }
+  insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
+  return NULL;
+}
+
+void* testEraseEraseThread(void*) {
+  for (int i = 0; i < kTestEraseInsertions; ++i) {
+    /*
+     * Make sure that we don't get ahead of the insert thread, because
+     * part of the condition for this unit test succeeding is that the
+     * map ends up empty.
+     *
+     * Note, there is a subtle case here when a new submap is
+     * allocated: the erasing thread might get 0 from count(key)
+     * because it hasn't seen numSubMaps_ update yet.  To avoid this
+     * race causing problems for the test (it's ok for real usage), we
+     * lag behind the inserter by more than just element.
+     */
+    const int lag = 10;
+    int currentLevel;
+    do {
+      currentLevel = insertedLevel.load(std::memory_order_acquire);
+      if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
+    } while (currentLevel - lag < i);
+
+    KeyT key = randomizeKey(i);
+    while (globalAHM->count(key)) {
+      if (globalAHM->erase(key)) {
+        break;
+      }
+    }
+  }
+  return NULL;
+}
+
+}
+
+// Here we have a single thread inserting some values, and several threads
+// racing to delete the values in the order they were inserted.
+TEST(Ahm, thread_erase_insert_race) {
+  const int kInsertThreads = 1;
+  const int kEraseThreads = 10;
+
+  VLOG(1) << "Testing insertion and erase with " << kInsertThreads
+    << " thread inserting and " << kEraseThreads << " threads erasing.";
+
+  globalAHM.reset(new AHMapT(kTestEraseInsertions / 4, config));
+
+  vector<pthread_t> threadIds;
+  for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
+    pthread_t tid;
+    void *(*thread)(void*) =
+      (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
+    if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
+      LOG(ERROR) << "Could not start thread";
+    } else {
+      threadIds.push_back(tid);
+    }
+  }
+  for (int i = 0; i < threadIds.size(); i++) {
+    pthread_join(threadIds[i], NULL);
+  }
+
+  EXPECT_TRUE(globalAHM->empty());
+  EXPECT_EQ(globalAHM->size(), 0);
+
+  VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
+}
+
+// Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
+typedef AtomicHashArray<int32_t, int32_t> AHA;
+AHA::Config configRace;
+auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
+void* atomicHashArrayInsertRaceThread(void* j) {
+  AHA* arr = atomicHashArrayInsertRaceArray.get();
+  uintptr_t numInserted = 0;
+  while (!runThreadsCreatedAllThreads.load());
+  for (int i = 0; i < 2; i++) {
+    if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
+      numInserted++;
+    }
+  }
+  pthread_exit((void *) numInserted);
+}
+TEST(Ahm, atomic_hash_array_insert_race) {
+  AHA* arr = atomicHashArrayInsertRaceArray.get();
+  int numIterations = 50000, FLAGS_numThreads = 4;
+  void* statuses[FLAGS_numThreads];
+  for (int i = 0; i < numIterations; i++) {
+    arr->clear();
+    runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses);
+    EXPECT_GE(arr->size(), 1);
+    for (int j = 0; j < FLAGS_numThreads; j++) {
+      EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
+    }
+  }
+}
+
+namespace {
+
+void loadGlobalAha() {
+  std::cout << "loading global AHA with " << FLAGS_numThreads
+            << " threads...\n";
+  uint64_t start = nowInUsec();
+  globalAHA = AHArrayT::create(maxBMElements, config);
+  numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
+  CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
+    "kNumThreads must evenly divide kNumInserts.";
+  runThreads(insertThreadArr);
+  uint64_t elapsed = nowInUsec() - start;
+  std::cout << "  took " << elapsed / 1000 << " ms (" <<
+    (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
+  EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
+}
+
+void loadGlobalAhm() {
+  std::cout << "loading global AHM with " << FLAGS_numThreads
+            << " threads...\n";
+  uint64_t start = nowInUsec();
+  globalAHM.reset(new AHMapT(maxBMElements, config));
+  numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
+  runThreads(insertThread);
+  uint64_t elapsed = nowInUsec() - start;
+  std::cout << "  took " << elapsed / 1000 << " ms (" <<
+    (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
+  EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
+}
+
+}
+
+BENCHMARK(st_aha_find, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  for (int i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i);
+    folly::doNotOptimizeAway(globalAHA->find(key)->second);
+  }
+}
+
+BENCHMARK(st_ahm_find, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  for (int i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i);
+    folly::doNotOptimizeAway(globalAHM->find(key)->second);
+  }
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(mt_ahm_miss, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load());
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      KeyT key = i + j * numOpsPerThread * 100;
+      folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
+    }
+    return nullptr;
+  });
+}
+
+BENCHMARK(st_ahm_miss, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  for (int i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i + iters * 100);
+    folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
+  }
+}
+
+BENCHMARK(mt_ahm_find_insert_mix, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load());
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      if (i % 128) {  // ~1% insert mix
+        KeyT key = randomizeKey(i + j * numOpsPerThread);
+        folly::doNotOptimizeAway(globalAHM->find(key)->second);
+      } else {
+        KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
+        globalAHM->insert(key, genVal(key));
+      }
+    }
+    return nullptr;
+  });
+}
+
+BENCHMARK(mt_aha_find, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+      int64_t j = (int64_t) jj;
+      while (!runThreadsCreatedAllThreads.load());
+      for (int i = 0; i < numOpsPerThread; ++i) {
+        KeyT key = randomizeKey(i + j * numOpsPerThread);
+        folly::doNotOptimizeAway(globalAHA->find(key)->second);
+      }
+      return nullptr;
+    });
+}
+
+BENCHMARK(mt_ahm_find, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load());
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      KeyT key = randomizeKey(i + j * numOpsPerThread);
+      folly::doNotOptimizeAway(globalAHM->find(key)->second);
+    }
+    return nullptr;
+  });
+}
+
+KeyT k;
+BENCHMARK(st_baseline_modulus_and_random, iters) {
+  for (int i = 0; i < iters; ++i) {
+    k = randomizeKey(i) % iters;
+  }
+}
+
+// insertions go last because they reset the map
+
+BENCHMARK(mt_ahm_insert, iters) {
+  BENCHMARK_SUSPEND {
+    globalAHM.reset(new AHMapT(int(iters * LF), config));
+    numOpsPerThread = iters / FLAGS_numThreads;
+  }
+  runThreads(insertThread);
+}
+
+BENCHMARK(st_ahm_insert, iters) {
+  folly::BenchmarkSuspender susp;
+  std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
+  susp.dismiss();
+
+  for (int i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i);
+    ahm->insert(key, genVal(key));
+  }
+}
+
+void benchmarkSetup() {
+  config.maxLoadFactor = FLAGS_maxLoadFactor;
+  configRace.maxLoadFactor = 0.5;
+  int numCores = sysconf(_SC_NPROCESSORS_ONLN);
+  loadGlobalAha();
+  loadGlobalAhm();
+  string numIters = folly::to<string>(
+    std::min(1000000, int(FLAGS_numBMElements)));
+
+  google::SetCommandLineOptionWithMode(
+    "bm_max_iters", numIters.c_str(), google::SET_FLAG_IF_DEFAULT
+  );
+  google::SetCommandLineOptionWithMode(
+    "bm_min_iters", numIters.c_str(), google::SET_FLAG_IF_DEFAULT
+  );
+  string numCoresStr = folly::to<string>(numCores);
+  google::SetCommandLineOptionWithMode(
+    "numThreads", numCoresStr.c_str(), google::SET_FLAG_IF_DEFAULT
+  );
+
+  std::cout << "\nRunning AHM benchmarks on machine with " << numCores
+    << " logical cores.\n"
+       "  num elements per map: " << FLAGS_numBMElements << "\n"
+    << "  num threads for mt tests: " << FLAGS_numThreads << "\n"
+    << "  AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret && FLAGS_benchmark) {
+    benchmarkSetup();
+    folly::runBenchmarks();
+  }
+  return ret;
+}
+
+/*
+Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
+  (12 physical cores, 12 MB cache, 72 GB RAM)
+
+Running AHM benchmarks on machine with 24 logical cores.
+  num elements per map: 12000000
+  num threads for mt tests: 24
+  AHM load factor: 0.75
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find
+*       BM_mt_aha_find                1000000  7.767 ms  7.767 ns  122.8 M
+ +0.81% BM_mt_ahm_find                1000000   7.83 ms   7.83 ns  121.8 M
+------------------------------------------------------------------------------
+Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find
+*       BM_st_aha_find                1000000  57.83 ms  57.83 ns  16.49 M
+ +77.9% BM_st_ahm_find                1000000  102.9 ms  102.9 ns   9.27 M
+------------------------------------------------------------------------------
+BM_mt_ahm_miss                        1000000  2.937 ms  2.937 ns  324.7 M
+BM_st_ahm_miss                        1000000  164.2 ms  164.2 ns  5.807 M
+BM_mt_ahm_find_insert_mix             1000000  8.797 ms  8.797 ns  108.4 M
+BM_mt_ahm_insert                      1000000  17.39 ms  17.39 ns  54.83 M
+BM_st_ahm_insert                      1000000  106.8 ms  106.8 ns   8.93 M
+BM_st_baseline_modulus_and_rando      1000000  6.223 ms  6.223 ns  153.2 M
+*/
diff --git a/folly/test/BenchmarkTest.cpp b/folly/test/BenchmarkTest.cpp
new file mode 100644
index 00000000..4e87485f
--- /dev/null
+++ b/folly/test/BenchmarkTest.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Benchmark.h"
+#include "folly/Foreach.h"
+#include "folly/String.h"
+#include <iostream>
+using namespace folly;
+using namespace std;
+
+void fun() {
+  static double x = 1;
+  ++x;
+  doNotOptimizeAway(x);
+}
+BENCHMARK(bmFun) { fun(); }
+BENCHMARK(bmRepeatedFun, n) {
+  FOR_EACH_RANGE (i, 0, n) {
+    fun();
+  }
+}
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(gun) {
+  static double x = 1;
+  x *= 2000;
+  doNotOptimizeAway(x);
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(baselinevector) {
+  vector<int> v;
+
+  BENCHMARK_SUSPEND {
+    v.resize(1000);
+  }
+
+  FOR_EACH_RANGE (i, 0, 100) {
+    v.push_back(42);
+  }
+}
+
+BENCHMARK_RELATIVE(bmVector) {
+  vector<int> v;
+  FOR_EACH_RANGE (i, 0, 100) {
+    v.resize(v.size() + 1, 42);
+  }
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(superslow) {
+  sleep(1);
+}
+
+int main(int argc, char** argv) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  runBenchmarks();
+  runBenchmarksOnFlag();
+}
diff --git a/folly/test/BitIteratorTest.cpp b/folly/test/BitIteratorTest.cpp
new file mode 100644
index 00000000..8d2fe521
--- /dev/null
+++ b/folly/test/BitIteratorTest.cpp
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Bits.h"
+#include "folly/Benchmark.h"
+
+#include <algorithm>
+#include <type_traits>
+#include <limits>
+#include <vector>
+#include <gtest/gtest.h>
+
+#include <gflags/gflags.h>
+
+using namespace folly;
+using namespace folly::bititerator_detail;
+
+namespace {
+
+template <class INT, class IT>
+void checkIt(INT exp, IT& it) {
+  typedef typename std::make_unsigned<INT>::type utype;
+  size_t bits = std::numeric_limits<utype>::digits;
+  utype uexp = exp;
+  for (size_t i = 0; i < bits; ++i) {
+    bool e = uexp & 1;
+    EXPECT_EQ(e, *it++);
+    uexp >>= 1;
+  }
+}
+
+template <class INT, class IT>
+void checkRange(INT exp, IT begin, IT end) {
+  typedef typename std::make_unsigned<INT>::type utype;
+  utype uexp = exp;
+  size_t i = 0;
+  auto bitEnd = makeBitIterator(end);
+  for (BitIterator<IT> it = makeBitIterator(begin); it != bitEnd; ++it, ++i) {
+    bool e = uexp & 1;
+    EXPECT_EQ(e, *it);
+    uexp >>= 1;
+  }
+}
+
+}  // namespace
+
+TEST(BitIterator, Simple) {
+  std::vector<int> v;
+  v.push_back(0x10);
+  v.push_back(0x42);
+  auto bi(makeBitIterator(v.begin()));
+  checkIt(0x10, bi);
+  checkIt(0x42, bi);
+  checkRange(0x0000004200000010ULL, v.begin(), v.end());
+
+  v[0] = 0;
+  bi = v.begin();
+  *bi++ = true;     // 1
+  *bi++ = false;
+  *bi++ = true;     // 4
+  *bi++ = false;
+  *bi++ = false;
+  *bi++ = true;     // 32
+  *++bi = true;     // 128 (note pre-increment)
+
+  EXPECT_EQ(165, v[0]);
+}
+
+TEST(BitIterator, Const) {
+  std::vector<int> v;
+  v.push_back(0x10);
+  v.push_back(0x42);
+  auto bi(makeBitIterator(v.cbegin()));
+  checkIt(0x10, bi);
+  checkIt(0x42, bi);
+}
+
+namespace {
+
+template <class BaseIter>
+BitIterator<BaseIter> simpleFFS(BitIterator<BaseIter> begin,
+                                BitIterator<BaseIter> end) {
+  return std::find(begin, end, true);
+}
+
+template <class FFS>
+void runFFSTest(FFS fn) {
+  static const size_t bpb = 8 * sizeof(uint64_t);
+  std::vector<uint64_t> data;
+  for (size_t nblocks = 1; nblocks <= 3; ++nblocks) {
+    size_t nbits = nblocks * bpb;
+    data.resize(nblocks);
+    auto begin = makeBitIterator(data.cbegin());
+    auto end = makeBitIterator(data.cend());
+    EXPECT_EQ(nbits, end - begin);
+    EXPECT_FALSE(begin == end);
+
+    // Try every possible combination of first bit set (including none),
+    // start bit, end bit
+    for (size_t firstSet = 0; firstSet <= nbits; ++firstSet) {
+      data.assign(nblocks, 0);
+      if (firstSet) {
+        size_t b = firstSet - 1;
+        data[b / bpb] |= (1ULL << (b % bpb));
+      }
+      for (size_t startBit = 0; startBit <= nbits; ++startBit) {
+        for (size_t endBit = startBit; endBit <= nbits; ++endBit) {
+          auto p = begin + startBit;
+          auto q = begin + endBit;
+          p = fn(p, q);
+          if (firstSet < startBit + 1 || firstSet >= endBit + 1) {
+            EXPECT_EQ(endBit, p - begin)
+              << "  firstSet=" << firstSet << " startBit=" << startBit
+              << " endBit=" << endBit << " nblocks=" << nblocks;
+          } else {
+            EXPECT_EQ(firstSet - 1, p - begin)
+              << "  firstSet=" << firstSet << " startBit=" << startBit
+              << " endBit=" << endBit << " nblocks=" << nblocks;
+          }
+        }
+      }
+    }
+  }
+}
+
+void runSimpleFFSTest(int iters) {
+  auto fn = simpleFFS<std::vector<uint64_t>::const_iterator>;
+  while (iters--) {
+    runFFSTest(fn);
+  }
+}
+
+void runRealFFSTest(int iters) {
+  auto fn = findFirstSet<std::vector<uint64_t>::const_iterator>;
+  while (iters--) {
+    runFFSTest(fn);
+  }
+}
+
+}
+
+TEST(BitIterator, SimpleFindFirstSet) {
+  runSimpleFFSTest(1);
+}
+
+TEST(BitIterator, FindFirstSet) {
+  runRealFFSTest(1);
+}
+
+BENCHMARK(SimpleFFSTest, iters) {
+  runSimpleFFSTest(iters);
+}
+BENCHMARK(RealFFSTest, iters) {
+  runRealFFSTest(iters);
+}
+
+/* --bm_min_iters=10 --bm_max_iters=100
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+runSimpleFFSTest                           10   4.82 s     482 ms  2.075
+runRealFFSTest                             19  2.011 s   105.9 ms  9.447
+
+*/
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return ret;
+}
diff --git a/folly/test/BitsTest.cpp b/folly/test/BitsTest.cpp
new file mode 100644
index 00000000..8f08a9e5
--- /dev/null
+++ b/folly/test/BitsTest.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author Tudor Bosman (tudorb@fb.com)
+
+#include <gflags/gflags.h>
+#include "folly/Bits.h"
+#include "folly/Benchmark.h"
+#include <gtest/gtest.h>
+
+using namespace folly;
+
+namespace {
+
+template <class INT>
+void testFFS() {
+  EXPECT_EQ(0, findFirstSet(static_cast<INT>(0)));
+  size_t bits = std::numeric_limits<
+    typename std::make_unsigned<INT>::type>::digits;
+  for (size_t i = 0; i < bits; i++) {
+    INT v = (static_cast<INT>(1) << (bits - 1)) |
+            (static_cast<INT>(1) << i);
+    EXPECT_EQ(i+1, findFirstSet(v));
+  }
+}
+
+template <class INT>
+unsigned int findLastSetPortable(INT x) {
+  return detail::findLastSetPortable(
+      static_cast<typename std::make_unsigned<INT>::type>(x));
+}
+
+template <class INT>
+void testFLS() {
+  typedef typename std::make_unsigned<INT>::type UINT;
+  EXPECT_EQ(0, findLastSet(static_cast<INT>(0)));
+  size_t bits = std::numeric_limits<UINT>::digits;
+  for (size_t i = 0; i < bits; i++) {
+    INT v1 = static_cast<UINT>(1) << i;
+    EXPECT_EQ(i + 1, findLastSet(v1));
+    EXPECT_EQ(i + 1, findLastSetPortable(v1));
+
+    INT v2 = (static_cast<UINT>(1) << i) - 1;
+    EXPECT_EQ(i, findLastSet(v2));
+    EXPECT_EQ(i, findLastSetPortable(v2));
+  }
+}
+
+}  // namespace
+
+TEST(Bits, FindFirstSet) {
+  testFFS<char>();
+  testFFS<signed char>();
+  testFFS<unsigned char>();
+  testFFS<short>();
+  testFFS<unsigned short>();
+  testFFS<int>();
+  testFFS<unsigned int>();
+  testFFS<long>();
+  testFFS<unsigned long>();
+  testFFS<long long>();
+  testFFS<unsigned long long>();
+}
+
+TEST(Bits, FindLastSet) {
+  testFLS<char>();
+  testFLS<signed char>();
+  testFLS<unsigned char>();
+  testFLS<short>();
+  testFLS<unsigned short>();
+  testFLS<int>();
+  testFLS<unsigned int>();
+  testFLS<long>();
+  testFLS<unsigned long>();
+  testFLS<long long>();
+  testFLS<unsigned long long>();
+}
+
+#define testPowTwo(nextPowTwoFunc) {                              \
+  EXPECT_EQ(1, nextPowTwoFunc(0u));                               \
+  EXPECT_EQ(1, nextPowTwoFunc(1u));                               \
+  EXPECT_EQ(2, nextPowTwoFunc(2u));                               \
+  EXPECT_EQ(4, nextPowTwoFunc(3u));                               \
+  EXPECT_EQ(4, nextPowTwoFunc(4u));                               \
+  EXPECT_EQ(8, nextPowTwoFunc(5u));                               \
+  EXPECT_EQ(8, nextPowTwoFunc(6u));                               \
+  EXPECT_EQ(8, nextPowTwoFunc(7u));                               \
+  EXPECT_EQ(8, nextPowTwoFunc(8u));                               \
+  EXPECT_EQ(16, nextPowTwoFunc(9u));                              \
+  EXPECT_EQ(16, nextPowTwoFunc(13u));                             \
+  EXPECT_EQ(16, nextPowTwoFunc(16u));                             \
+  EXPECT_EQ(512, nextPowTwoFunc(510u));                           \
+  EXPECT_EQ(512, nextPowTwoFunc(511u));                           \
+  EXPECT_EQ(512, nextPowTwoFunc(512u));                           \
+  EXPECT_EQ(1024, nextPowTwoFunc(513u));                          \
+  EXPECT_EQ(1024, nextPowTwoFunc(777u));                          \
+  EXPECT_EQ(1ul << 31, nextPowTwoFunc((1ul << 31) - 1));          \
+  EXPECT_EQ(1ul << 32, nextPowTwoFunc((1ul << 32) - 1));          \
+  EXPECT_EQ(1ull << 63, nextPowTwoFunc((1ull << 62) + 1));        \
+}
+
+
+#ifdef __GNUC__
+
+TEST(Bits, nextPowTwoClz) {
+  testPowTwo(nextPowTwo);
+}
+
+int x; // prevent the loop from getting optimized away
+BENCHMARK(nextPowTwoClz, iters) {
+  x = folly::nextPowTwo(iters);
+}
+
+#endif
+
+TEST(Bits, nextPowTwoPortable) {
+  testPowTwo(detail::nextPowTwoPortable);
+}
+
+BENCHMARK(nextPowTwoPortable, iters) {
+  x = detail::nextPowTwoPortable(iters);
+}
+
+BENCHMARK_DRAW_LINE();
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return ret;
+}
+
+/*
+Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
+  (12 physical cores, 12 MB cache, 72 GB RAM)
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+*       nextPowTwoClz                 1000000  1.659 ms  1.659 ns  574.8 M
+ +66.8% nextPowTwoPortable            1000000  2.767 ms  2.767 ns  344.7 M
+*/
diff --git a/folly/test/ConcurrentSkipListBenchmark.cpp b/folly/test/ConcurrentSkipListBenchmark.cpp
new file mode 100644
index 00000000..e2fae29b
--- /dev/null
+++ b/folly/test/ConcurrentSkipListBenchmark.cpp
@@ -0,0 +1,696 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author: Xin Liu <xliux@fb.com>
+
+#include <map>
+#include <set>
+
+#include <boost/shared_ptr.hpp>
+#include <boost/thread.hpp>
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include "folly/Benchmark.h"
+#include "folly/ConcurrentSkipList.h"
+#include "folly/Hash.h"
+#include "folly/RWSpinLock.h"
+
+
+DEFINE_int32(num_threads, 12, "num concurrent threads to test");
+
+// In some case, we may want to test worker threads operating on multiple
+// lists. For example in search, not all threads are visiting the same posting
+// list, but for the ones with some popular terms, they do get multiple
+// visitors at the same time.
+DEFINE_int32(num_sets, 1, "num of set to operate on");
+
+static const int kInitHeadHeight = 10;
+static const int kMaxValue = 0x1000000;
+
+namespace {
+
+using namespace folly;
+
+typedef int ValueType;
+typedef ConcurrentSkipList<ValueType> SkipListType;
+typedef SkipListType::Accessor SkipListAccessor;
+typedef std::set<ValueType> SetType;
+
+static std::vector<ValueType> gData;
+static void initData() {
+  gData.resize(kMaxValue);
+  for (int i = 0; i < kMaxValue; ++i) {
+    gData[i] = i;
+  }
+  std::random_shuffle(gData.begin(), gData.end());
+}
+
+// single thread benchmarks
+void BM_IterateOverSet(int iters, int size) {
+  SetType a_set;
+
+  BENCHMARK_SUSPEND {
+    CHECK_GT(size, 0);
+    for (int i = 0; i < size; ++i) {
+      a_set.insert(gData[rand() % kMaxValue]);
+    }
+  }
+
+  int64_t sum = 0;
+  auto iter = a_set.begin();
+  for (int i = 0; i < iters; ++i) {
+    sum += *iter++;
+    if (iter == a_set.end()) iter = a_set.begin();
+  }
+  BENCHMARK_SUSPEND {
+    VLOG(20) << "sum = " << sum;
+  }
+}
+
+void BM_IterateSkipList(int iters, int size) {
+  BenchmarkSuspender susp;
+  CHECK_GT(size, 0);
+  auto skipList = SkipListType::create(kInitHeadHeight);
+  for (int i = 0; i < size; ++i) {
+    skipList.add(rand() % kMaxValue);
+  }
+  int64_t sum = 0;
+  susp.dismiss();
+
+  auto iter = skipList.begin();
+  for (int i = 0; i < iters; ++i) {
+    sum += *iter++;
+    if (iter == skipList.end()) iter = skipList.begin();
+  }
+
+  BENCHMARK_SUSPEND {
+    VLOG(20) << "sum = " << sum;
+  }
+}
+
+void BM_SetMerge(int iters, int size) {
+  BenchmarkSuspender susp;
+  SetType a_set;
+  SetType b_set;
+  for (int i = 0; i < iters; ++i) {
+    a_set.insert(rand() % kMaxValue);
+  }
+  for (int i = 0; i < size; ++i) {
+    b_set.insert(rand() % kMaxValue);
+  }
+  susp.dismiss();
+
+  int64_t mergedSum = 0;
+  FOR_EACH(it, a_set) {
+    if (b_set.find(*it) != b_set.end()) mergedSum += *it;
+  }
+  BENCHMARK_SUSPEND {
+    VLOG(20) << mergedSum;
+  }
+}
+
+void BM_CSLMergeLookup(int iters, int size) {
+  BenchmarkSuspender susp;
+  auto skipList = SkipListType::create(kInitHeadHeight);
+  auto skipList2 = SkipListType::create(kInitHeadHeight);
+
+  for (int i = 0; i < iters; ++i) {
+    skipList.add(rand() % kMaxValue);
+  }
+  for (int i = 0; i < size; ++i) {
+    skipList2.add(rand() % kMaxValue);
+  }
+  int64_t mergedSum = 0;
+  susp.dismiss();
+
+  SkipListType::Skipper skipper(skipList2);
+  FOR_EACH(it, skipList) {
+    if (skipper.to(*it)) mergedSum += *it;
+  }
+
+  BENCHMARK_SUSPEND {
+    VLOG(20) << mergedSum;
+  }
+}
+
+// merge by two skippers
+void BM_CSLMergeIntersection(int iters, int size) {
+  BenchmarkSuspender susp;
+  auto skipList = SkipListType::create(kInitHeadHeight);
+  auto skipList2 = SkipListType::create(kInitHeadHeight);
+  for (int i = 0; i < iters; ++i) {
+    skipList.add(rand() % kMaxValue);
+  }
+  for (int i = 0; i < size; ++i) {
+    skipList2.add(rand() % kMaxValue);
+  }
+  susp.dismiss();
+
+  SkipListType::Skipper s1(skipList);
+  SkipListType::Skipper s2(skipList2);
+
+  int64_t mergedSum = 0;
+
+  while (s1.good() && s2.good()) {
+    int v1 = s1.data();
+    int v2 = s2.data();
+    if (v1 < v2) {
+      s1.to(v2);
+    } else if (v1 > v2) {
+      s2.to(v1);
+    } else {
+      mergedSum += v1;
+      ++s1;
+      ++s2;
+    }
+  }
+
+  BENCHMARK_SUSPEND {
+    VLOG(20) << mergedSum;
+  }
+}
+
+void BM_SetContainsNotFound(int iters, int size) {
+  BenchmarkSuspender susp;
+  SetType aset;
+  CHECK_LT(size, kMaxValue);
+  for (int i = 0; i < size; ++i) {
+    aset.insert(2 * i);
+  }
+  int64_t sum = 0;
+  susp.dismiss();
+
+  for (int i = 0; i < iters; ++i) {
+    sum += (aset.end() == aset.find(2 * i + 1));
+  }
+
+  BENCHMARK_SUSPEND {
+    VLOG(20) << sum;
+  }
+}
+
+void BM_SetContainsFound(int iters, int size) {
+  BenchmarkSuspender susp;
+  SetType aset;
+  CHECK_LT(size, kMaxValue);
+
+  for (int i = 0; i < size; ++i) {
+    aset.insert(i);
+  }
+
+  std::vector<int> values;
+  for (int i = 0; i < iters; ++i) {
+    values.push_back(rand() % size);
+  }
+  int64_t sum = 0;
+  susp.dismiss();
+
+  for (int i = 0; i < iters; ++i) {
+    sum += (aset.end() == aset.find(values[i]));
+  }
+
+  BENCHMARK_SUSPEND {
+    VLOG(20) << sum;
+  }
+}
+
+void BM_CSLContainsFound(int iters, int size) {
+  BenchmarkSuspender susp;
+  auto skipList = SkipListType::create(kInitHeadHeight);
+  CHECK_LT(size, kMaxValue);
+
+  for (int i = 0; i < size; ++i) {
+    skipList.add(i);
+  }
+  std::vector<int> values;
+  for (int i = 0; i < iters; ++i) {
+    values.push_back(rand() % size);
+  }
+  int64_t sum = 0;
+  susp.dismiss();
+
+  for (int i = 0; i < iters; ++i) {
+    sum += skipList.contains(values[i]);
+  }
+
+  BENCHMARK_SUSPEND {
+    VLOG(20) << sum;
+  }
+}
+
+void BM_CSLContainsNotFound(int iters, int size) {
+  BenchmarkSuspender susp;
+  auto skipList = SkipListType::create(kInitHeadHeight);
+  CHECK_LT(size, kMaxValue);
+
+  for (int i = 0; i < size; ++i) {
+    skipList.add(2 * i);
+  }
+  int64_t sum = 0;
+  susp.dismiss();
+
+  for (int i = 0; i < iters; ++i) {
+    sum += skipList.contains(2 * i + 1);
+  }
+
+  BENCHMARK_SUSPEND {
+    VLOG(20) << sum;
+  }
+}
+
+void BM_AddSet(int iters, int size) {
+  BenchmarkSuspender susp;
+  SetType aset;
+  for (int i = 0; i < size; ++i) {
+    aset.insert(gData[i]);
+  }
+  susp.dismiss();
+
+  for (int i = size; i < size + iters; ++i) {
+    aset.insert(gData[i]);
+  }
+}
+
+void BM_AddSkipList(int iters, int size) {
+  BenchmarkSuspender susp;
+  auto skipList = SkipListType::create(kInitHeadHeight);
+  for (int i = 0; i < size; ++i) {
+    skipList.add(gData[i]);
+  }
+  susp.dismiss();
+
+  for (int i = size; i < size + iters; ++i) {
+    skipList.add(gData[i]);
+  }
+}
+
+BENCHMARK(Accessor, iters) {
+  BenchmarkSuspender susp;
+  auto skiplist = SkipListType::createInstance(kInitHeadHeight);
+  auto sl = skiplist.get();
+
+  susp.dismiss();
+  for (int i = 0; i < iters; ++i) {
+    SkipListAccessor accessor(sl);
+  }
+}
+
+// a benchmark to estimate the
+// low bound of doing a ref counting for an Accessor
+BENCHMARK(accessorBasicRefcounting, iters) {
+  BenchmarkSuspender susp;
+  auto* value = new std::atomic<int32_t>();
+  auto* dirty = new std::atomic<int32_t>();
+  *value = *dirty = 0;
+  folly::MicroSpinLock l;
+  l.init();
+
+  susp.dismiss();
+  for (int i = 0; i < iters; ++i) {
+    value->fetch_add(1, std::memory_order_relaxed);
+    if (dirty->load(std::memory_order_acquire) != 0) {
+      folly::MSLGuard g(l);
+    }
+    value->fetch_sub(1, std::memory_order_relaxed);
+  }
+
+  BENCHMARK_SUSPEND {
+    delete dirty;
+    delete value;
+  }
+}
+
+
+// Data For testing contention benchmark
+class ConcurrentAccessData {
+ public:
+  explicit ConcurrentAccessData(int size) :
+    skipList_(SkipListType::create(10)),
+    sets_(FLAGS_num_sets), locks_(FLAGS_num_sets) {
+
+    for (int i = 0; i < size; ++i) {
+      sets_[0].insert(i);
+      skipList_.add(i);
+    }
+
+    for (int i = 0; i < FLAGS_num_sets; ++i) {
+      locks_[i] = new RWSpinLock();
+      if (i > 0) sets_[i] = sets_[0];
+    }
+
+    // memory usage
+    int64_t setMemorySize = sets_[0].size() * sizeof(*sets_[0].begin()._M_node);
+    int64_t cslMemorySize = 0;
+    for (auto it = skipList_.begin(); it != skipList_.end(); ++it) {
+      cslMemorySize += it.nodeSize();
+    }
+
+    LOG(INFO) << "size=" << sets_[0].size()
+      << "; std::set memory size=" << setMemorySize
+      << "; csl memory size=" << cslMemorySize;
+
+    readValues_.reserve(size);
+    deleteValues_.reserve(size);
+    writeValues_.reserve(size);
+    for (int i = size; i < 2 * size; ++i) {
+      readValues_.push_back(2 * i);
+      deleteValues_.push_back(2 * i);
+
+      // half new values and half already in the list
+      writeValues_.push_back((rand() % 2) + 2 * i);
+    }
+    std::random_shuffle(readValues_.begin(), readValues_.end());
+    std::random_shuffle(deleteValues_.begin(), deleteValues_.end());
+    std::random_shuffle(writeValues_.begin(), writeValues_.end());
+  }
+
+  ~ConcurrentAccessData() {
+    FOR_EACH(lock, locks_) delete *lock;
+  }
+
+  inline bool skipListFind(int idx, ValueType val) {
+    return skipList_.contains(val);
+  }
+  inline void skipListInsert(int idx, ValueType val) {
+    skipList_.add(val);
+  }
+  inline void skipListErase(int idx, ValueType val) {
+    skipList_.remove(val);
+  }
+
+  inline bool setFind(int idx, ValueType val) {
+    RWSpinLock::ReadHolder g(locks_[idx]);
+    return sets_[idx].find(val) == sets_[idx].end();
+  }
+  inline void setInsert(int idx, ValueType val) {
+    RWSpinLock::WriteHolder g(locks_[idx]);
+    sets_[idx].insert(val);
+  }
+  inline void setErase(int idx, ValueType val) {
+    RWSpinLock::WriteHolder g(locks_[idx]);
+    sets_[idx].erase(val);
+  }
+
+  void runSkipList(int id, int iters) {
+    int sum = 0;
+    for (int i = 0; i < iters; ++i) {
+      sum += accessSkipList(id, i);
+    }
+    VLOG(20) << sum;
+  }
+
+  void runSet(int id, int iters) {
+    int sum = 0;
+    for (int i = 0; i < iters; ++i) {
+      sum += accessSet(id, i);
+    }
+    VLOG(20) << sum;
+  }
+
+  bool accessSkipList(int64_t id, int t) {
+    if (t > readValues_.size()) {
+      t = t % readValues_.size();
+    }
+    uint32_t h = folly::hash::twang_32from64(t * id);
+    switch (h % 8) {
+      case 7:   // write
+        if ((h & 0x31) == 0) { // 1/4 chance to delete
+          skipListErase(0, deleteValues_[t]);
+        } else {
+          skipListInsert(0, writeValues_[t]);
+        }
+        return 0;
+      default:
+        return skipListFind(0, readValues_[t]);
+    }
+  }
+
+  bool accessSet(int64_t id, int t) {
+    if (t > readValues_.size()) {
+      t = t % readValues_.size();
+    }
+    uint32_t h = folly::hash::twang_32from64(t * id);
+    int idx = (h % FLAGS_num_sets);
+    switch (h % 8) {  // 1/8 chance to write
+      case 7:   // write
+        if ((h & 0x31) == 0) { // 1/32 chance to delete
+          setErase(idx, deleteValues_[t]);
+        } else {
+          setInsert(idx, writeValues_[t]);
+        }
+        return 0;
+      default:
+        return setFind(idx, readValues_[t]);
+    }
+  }
+
+ private:
+  SkipListType::Accessor skipList_;
+  std::vector<SetType> sets_;
+  std::vector<RWSpinLock*> locks_;
+
+  std::vector<ValueType> readValues_;
+  std::vector<ValueType> writeValues_;
+  std::vector<ValueType> deleteValues_;
+};
+
+static std::map<int, boost::shared_ptr<ConcurrentAccessData> > g_data;
+
+static ConcurrentAccessData *mayInitTestData(int size) {
+  auto it = g_data.find(size);
+  if (it == g_data.end()) {
+    auto ptr = boost::shared_ptr<ConcurrentAccessData>(
+        new ConcurrentAccessData(size));
+    g_data[size] = ptr;
+    return ptr.get();
+  }
+  return it->second.get();
+}
+
+void BM_ContentionCSL(int iters, int size) {
+  BenchmarkSuspender susp;
+  auto data = mayInitTestData(size);
+  std::vector<boost::thread> threads;
+  susp.dismiss();
+
+  for (int i = 0; i < FLAGS_num_threads; ++i) {
+    threads.push_back(boost::thread(
+          &ConcurrentAccessData::runSkipList, data, i, iters));
+  }
+  FOR_EACH(t, threads) {
+    (*t).join();
+  }
+}
+
+void BM_ContentionStdSet(int iters, int size) {
+  BenchmarkSuspender susp;
+  auto data = mayInitTestData(size);
+  std::vector<boost::thread> threads;
+  susp.dismiss();
+
+  for (int i = 0; i < FLAGS_num_threads; ++i) {
+    threads.push_back(boost::thread(
+          &ConcurrentAccessData::runSet, data, i, iters));
+  }
+  FOR_EACH(t, threads) {
+    (*t).join();
+  }
+  susp.rehire();
+}
+
+
+// Single-thread benchmarking
+
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK_PARAM(BM_IterateOverSet,  1000);
+BENCHMARK_PARAM(BM_IterateSkipList, 1000);
+BENCHMARK_DRAW_LINE();
+BENCHMARK_PARAM(BM_IterateOverSet,  1000000);
+BENCHMARK_PARAM(BM_IterateSkipList, 1000000);
+BENCHMARK_DRAW_LINE();
+
+// find with keys in the set
+BENCHMARK_PARAM(BM_SetContainsFound, 1000);
+BENCHMARK_PARAM(BM_CSLContainsFound, 1000);
+BENCHMARK_DRAW_LINE();
+BENCHMARK_PARAM(BM_SetContainsFound, 100000);
+BENCHMARK_PARAM(BM_CSLContainsFound, 100000);
+BENCHMARK_DRAW_LINE();
+BENCHMARK_PARAM(BM_SetContainsFound, 1000000);
+BENCHMARK_PARAM(BM_CSLContainsFound, 1000000);
+BENCHMARK_DRAW_LINE();
+BENCHMARK_PARAM(BM_SetContainsFound, 10000000);
+BENCHMARK_PARAM(BM_CSLContainsFound, 10000000);
+BENCHMARK_DRAW_LINE();
+
+
+// find with keys not in the set
+BENCHMARK_PARAM(BM_SetContainsNotFound, 1000);
+BENCHMARK_PARAM(BM_CSLContainsNotFound, 1000);
+BENCHMARK_DRAW_LINE();
+BENCHMARK_PARAM(BM_SetContainsNotFound, 100000);
+BENCHMARK_PARAM(BM_CSLContainsNotFound, 100000);
+BENCHMARK_DRAW_LINE();
+BENCHMARK_PARAM(BM_SetContainsNotFound, 1000000);
+BENCHMARK_PARAM(BM_CSLContainsNotFound, 1000000);
+BENCHMARK_DRAW_LINE();
+
+
+BENCHMARK_PARAM(BM_AddSet,      1000);
+BENCHMARK_PARAM(BM_AddSkipList, 1000);
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK_PARAM(BM_AddSet,      65536);
+BENCHMARK_PARAM(BM_AddSkipList, 65536);
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK_PARAM(BM_AddSet,      1000000);
+BENCHMARK_PARAM(BM_AddSkipList, 1000000);
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK_PARAM(BM_SetMerge,             1000);
+BENCHMARK_PARAM(BM_CSLMergeIntersection, 1000);
+BENCHMARK_PARAM(BM_CSLMergeLookup,       1000);
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK_PARAM(BM_SetMerge,             65536);
+BENCHMARK_PARAM(BM_CSLMergeIntersection, 65536);
+BENCHMARK_PARAM(BM_CSLMergeLookup,       65536);
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK_PARAM(BM_SetMerge,             1000000);
+BENCHMARK_PARAM(BM_CSLMergeIntersection, 1000000);
+BENCHMARK_PARAM(BM_CSLMergeLookup,       1000000);
+BENCHMARK_DRAW_LINE();
+
+
+// multi-thread benchmarking
+
+BENCHMARK_PARAM(BM_ContentionStdSet, 1024);
+BENCHMARK_PARAM(BM_ContentionCSL,    1024);
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK_PARAM(BM_ContentionStdSet, 65536);
+BENCHMARK_PARAM(BM_ContentionCSL,    65536);
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK_PARAM(BM_ContentionStdSet, 1048576);
+BENCHMARK_PARAM(BM_ContentionCSL,    1048576);
+BENCHMARK_DRAW_LINE();
+
+}
+
+int main(int argc, char** argv) {
+  google::InitGoogleLogging(argv[0]);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  initData();
+  runBenchmarks();
+  return 0;
+}
+
+#if 0
+/*
+Benchmark on Intel(R) Xeon(R) CPU X5650 @2.67GHz
+
+==============================================================================
+1 thread Benchmark                     Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+ +37.0% BM_Accessor                    100000  1.958 ms  19.58 ns  48.71 M
+*       BM_AccessorBasicRefcounting    100000  1.429 ms  14.29 ns  66.74 M
+------------------------------------------------------------------------------
+ + 603% BM_IterateOverSet/1000         100000  1.589 ms  15.89 ns  60.02 M
+*       BM_IterateSkipList/1000        100000    226 us   2.26 ns    422 M
+------------------------------------------------------------------------------
+ + 107% BM_IterateOverSet/976.6k       100000  8.324 ms  83.24 ns  11.46 M
+*       BM_IterateSkipList/976.6k      100000  4.016 ms  40.16 ns  23.75 M
+------------------------------------------------------------------------------
+*       BM_SetContainsFound/1000       100000  7.082 ms  70.82 ns  13.47 M
+ +39.9% BM_CSLContainsFound/1000       100000  9.908 ms  99.08 ns  9.625 M
+------------------------------------------------------------------------------
+*       BM_SetContainsFound/97.66k     100000   23.8 ms    238 ns  4.006 M
+ +5.97% BM_CSLContainsFound/97.66k     100000  25.23 ms  252.3 ns  3.781 M
+------------------------------------------------------------------------------
+ +33.6% BM_SetContainsFound/976.6k     100000   64.3 ms    643 ns  1.483 M
+*       BM_CSLContainsFound/976.6k     100000  48.13 ms  481.3 ns  1.981 M
+------------------------------------------------------------------------------
+ +30.3% BM_SetContainsFound/9.537M     100000  115.1 ms  1.151 us  848.6 k
+*       BM_CSLContainsFound/9.537M     100000  88.33 ms  883.3 ns   1.08 M
+------------------------------------------------------------------------------
+*       BM_SetContainsNotFound/1000    100000  2.081 ms  20.81 ns  45.83 M
+ +76.2% BM_CSLContainsNotFound/1000    100000  3.667 ms  36.67 ns  26.01 M
+------------------------------------------------------------------------------
+*       BM_SetContainsNotFound/97.66k  100000  6.049 ms  60.49 ns  15.77 M
+ +32.7% BM_CSLContainsNotFound/97.66k  100000  8.025 ms  80.25 ns  11.88 M
+------------------------------------------------------------------------------
+*       BM_SetContainsNotFound/976.6k  100000  7.464 ms  74.64 ns  12.78 M
+ +12.8% BM_CSLContainsNotFound/976.6k  100000  8.417 ms  84.17 ns  11.33 M
+------------------------------------------------------------------------------
+*       BM_AddSet/1000                 100000  29.26 ms  292.6 ns  3.259 M
+ +70.0% BM_AddSkipList/1000            100000  49.75 ms  497.5 ns  1.917 M
+------------------------------------------------------------------------------
+*       BM_AddSet/64k                  100000  38.73 ms  387.3 ns  2.462 M
+ +55.7% BM_AddSkipList/64k             100000   60.3 ms    603 ns  1.581 M
+------------------------------------------------------------------------------
+*       BM_AddSet/976.6k               100000  75.71 ms  757.1 ns   1.26 M
+ +33.6% BM_AddSkipList/976.6k          100000  101.2 ms  1.012 us  965.3 k
+------------------------------------------------------------------------------
+ + 716% BM_SetMerge/1000               100000  6.872 ms  68.72 ns  13.88 M
+*       BM_CSLMergeIntersection/1000   100000    842 us   8.42 ns  113.3 M
+ + 268% BM_CSLMergeLookup/1000         100000    3.1 ms     31 ns  30.76 M
+------------------------------------------------------------------------------
+ +36.3% BM_SetMerge/64k                100000  14.03 ms  140.3 ns  6.798 M
+ +39.4% BM_CSLMergeIntersection/64k    100000  14.35 ms  143.5 ns  6.645 M
+*       BM_CSLMergeLookup/64k          100000  10.29 ms  102.9 ns  9.266 M
+------------------------------------------------------------------------------
+ +10.3% BM_SetMerge/976.6k             100000  46.24 ms  462.4 ns  2.062 M
+ +25.1% BM_CSLMergeIntersection/976.6k 100000  52.47 ms  524.7 ns  1.818 M
+*       BM_CSLMergeLookup/976.6k       100000  41.94 ms  419.3 ns  2.274 M
+------------------------------------------------------------------------------
+
+
+==============================================================================
+Contention benchmark 7/8 find, 3/32 insert, 1/32 erase
+
+ 4 threads Benchmark                   Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+ + 269% BM_ContentionStdSet/1k         100000  75.66 ms  756.6 ns   1.26 M
+*       BM_ContentionCSL/1k            100000  20.47 ms  204.7 ns  4.658 M
+------------------------------------------------------------------------------
+ + 228% BM_ContentionStdSet/64k        100000  105.6 ms  1.056 us  924.9 k
+*       BM_ContentionCSL/64k           100000  32.18 ms  321.8 ns  2.963 M
+------------------------------------------------------------------------------
+ + 224% BM_ContentionStdSet/1M         100000  117.4 ms  1.174 us  832.2 k
+*       BM_ContentionCSL/1M            100000  36.18 ms  361.8 ns  2.636 M
+------------------------------------------------------------------------------
+
+
+12 threads Benchmark                   Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+ + 697% BM_ContentionStdSet/1k         100000  455.3 ms  4.553 us  214.5 k
+*       BM_ContentionCSL/1k            100000  57.12 ms  571.2 ns   1.67 M
+------------------------------------------------------------------------------
+ +1257% BM_ContentionStdSet/64k        100000  654.9 ms  6.549 us  149.1 k
+*       BM_ContentionCSL/64k           100000  48.24 ms  482.4 ns  1.977 M
+------------------------------------------------------------------------------
+ +1262% BM_ContentionStdSet/1M         100000  657.3 ms  6.573 us  148.6 k
+*       BM_ContentionCSL/1M            100000  48.25 ms  482.5 ns  1.977 M
+------------------------------------------------------------------------------
+
+*/
+#endif
diff --git a/folly/test/ConcurrentSkipListTest.cpp b/folly/test/ConcurrentSkipListTest.cpp
new file mode 100644
index 00000000..0091455a
--- /dev/null
+++ b/folly/test/ConcurrentSkipListTest.cpp
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author: Xin Liu <xliux@fb.com>
+
+#include <set>
+#include <vector>
+#include <boost/thread.hpp>
+
+#include <glog/logging.h>
+#include <gflags/gflags.h>
+#include "folly/ConcurrentSkipList.h"
+#include "folly/Foreach.h"
+#include "gtest/gtest.h"
+
+DEFINE_int32(num_threads, 12, "num concurrent threads to test");
+
+namespace {
+
+using namespace folly;
+using std::vector;
+
+typedef int ValueType;
+typedef detail::SkipListNode<ValueType> SkipListNodeType;
+typedef ConcurrentSkipList<ValueType> SkipListType;
+typedef SkipListType::Accessor SkipListAccessor;
+typedef vector<ValueType> VectorType;
+typedef std::set<ValueType> SetType;
+
+static const int kHeadHeight = 2;
+static const int kMaxValue = 5000;
+
+static void randomAdding(int size,
+    SkipListAccessor skipList,
+    SetType *verifier,
+    int maxValue = kMaxValue) {
+  for (int i = 0; i < size; ++i) {
+    int32_t r = rand() % maxValue;
+    verifier->insert(r);
+    skipList.add(r);
+  }
+}
+
+static void randomRemoval(int size,
+    SkipListAccessor skipList,
+    SetType *verifier,
+    int maxValue=kMaxValue) {
+  for (int i = 0; i < size; ++i) {
+    int32_t r = rand() % maxValue;
+    verifier->insert(r);
+    skipList.remove(r);
+  }
+}
+
+static void sumAllValues(SkipListAccessor skipList, int64_t *sum) {
+  *sum = 0;
+  FOR_EACH(it, skipList) {
+    *sum += *it;
+  }
+  VLOG(20) << "sum = " << sum;
+}
+
+static void concurrentSkip(const vector<ValueType> *values,
+    SkipListAccessor skipList) {
+  int64_t sum = 0;
+  SkipListAccessor::Skipper skipper(skipList);
+  FOR_EACH(it, *values) {
+    if (skipper.to(*it)) sum += *it;
+  }
+  VLOG(20) << "sum = " << sum;
+}
+
+bool verifyEqual(SkipListAccessor skipList,
+    const SetType &verifier) {
+  EXPECT_EQ(verifier.size(), skipList.size());
+  FOR_EACH(it, verifier) {
+    CHECK(skipList.contains(*it)) << *it;
+    SkipListType::const_iterator iter = skipList.find(*it);
+    CHECK(iter != skipList.end());
+    EXPECT_EQ(*iter, *it);
+  }
+  EXPECT_TRUE(std::equal(verifier.begin(), verifier.end(), skipList.begin()));
+  return true;
+}
+
+TEST(ConcurrentSkipList, SequentialAccess) {
+  {
+    LOG(INFO) << "nodetype size=" << sizeof(SkipListNodeType);
+
+    auto skipList(SkipListType::create(kHeadHeight));
+    EXPECT_TRUE(skipList.first() == NULL);
+    EXPECT_TRUE(skipList.last() == NULL);
+
+    skipList.add(3);
+    EXPECT_TRUE(skipList.contains(3));
+    EXPECT_FALSE(skipList.contains(2));
+    EXPECT_EQ(3, *skipList.first());
+    EXPECT_EQ(3, *skipList.last());
+
+    EXPECT_EQ(3, *skipList.find(3));
+    EXPECT_FALSE(skipList.find(3) == skipList.end());
+    EXPECT_TRUE(skipList.find(2) == skipList.end());
+
+    {
+      SkipListAccessor::Skipper skipper(skipList);
+      skipper.to(3);
+      CHECK_EQ(3, *skipper);
+    }
+
+    skipList.add(2);
+    EXPECT_EQ(2, *skipList.first());
+    EXPECT_EQ(3, *skipList.last());
+    skipList.add(5);
+    EXPECT_EQ(5, *skipList.last());
+    skipList.add(3);
+    EXPECT_EQ(5, *skipList.last());
+    auto ret = skipList.insert(9);
+    EXPECT_EQ(9, *ret.first);
+    EXPECT_TRUE(ret.second);
+
+    ret = skipList.insert(5);
+    EXPECT_EQ(5, *ret.first);
+    EXPECT_FALSE(ret.second);
+
+    EXPECT_EQ(2, *skipList.first());
+    EXPECT_EQ(9, *skipList.last());
+    EXPECT_TRUE(skipList.pop_back());
+    EXPECT_EQ(5, *skipList.last());
+    EXPECT_TRUE(skipList.pop_back());
+    EXPECT_EQ(3, *skipList.last());
+
+    skipList.add(9);
+    skipList.add(5);
+
+    CHECK(skipList.contains(2));
+    CHECK(skipList.contains(3));
+    CHECK(skipList.contains(5));
+    CHECK(skipList.contains(9));
+    CHECK(!skipList.contains(4));
+
+    // lower_bound
+    auto it = skipList.lower_bound(5);
+    EXPECT_EQ(5, *it);
+    it = skipList.lower_bound(4);
+    EXPECT_EQ(5, *it);
+    it = skipList.lower_bound(9);
+    EXPECT_EQ(9, *it);
+    it = skipList.lower_bound(12);
+    EXPECT_FALSE(it.good());
+
+    it = skipList.begin();
+    EXPECT_EQ(2, *it);
+
+    // skipper test
+    SkipListAccessor::Skipper skipper(skipList);
+    skipper.to(3);
+    EXPECT_EQ(3, skipper.data());
+    skipper.to(5);
+    EXPECT_EQ(5, skipper.data());
+    CHECK(!skipper.to(7));
+
+    skipList.remove(5);
+    skipList.remove(3);
+    CHECK(skipper.to(9));
+    EXPECT_EQ(9, skipper.data());
+
+    CHECK(!skipList.contains(3));
+    skipList.add(3);
+    CHECK(skipList.contains(3));
+    int pos = 0;
+    FOR_EACH(it, skipList) {
+      LOG(INFO) << "pos= " << pos++ << " value= " << *it;
+    }
+  }
+
+  {
+    auto skipList(SkipListType::create(kHeadHeight));
+
+    SetType verifier;
+    randomAdding(10000, skipList, &verifier);
+    verifyEqual(skipList, verifier);
+
+    // test skipper
+    SkipListAccessor::Skipper skipper(skipList);
+    int num_skips = 1000;
+    for (int i = 0; i < num_skips; ++i) {
+      int n = i * kMaxValue / num_skips;
+      bool found = skipper.to(n);
+      EXPECT_EQ(found, (verifier.find(n) != verifier.end()));
+    }
+  }
+
+}
+
+void testConcurrentAdd(int numThreads) {
+  auto skipList(SkipListType::create(kHeadHeight));
+
+  vector<boost::thread> threads;
+  vector<SetType> verifiers(numThreads);
+  for (int i = 0; i < numThreads; ++i) {
+    threads.push_back(boost::thread(
+          &randomAdding, 100, skipList, &verifiers[i], kMaxValue));
+  }
+  for (int i = 0; i < threads.size(); ++i) {
+    threads[i].join();
+  }
+
+  SetType all;
+  FOR_EACH(s, verifiers) {
+    all.insert(s->begin(), s->end());
+  }
+  verifyEqual(skipList, all);
+}
+
+TEST(ConcurrentSkipList, ConcurrentAdd) {
+  // test it many times
+  for (int numThreads = 10; numThreads < 10000; numThreads += 1000) {
+    testConcurrentAdd(numThreads);
+  }
+}
+
+void testConcurrentRemoval(int numThreads, int maxValue) {
+  auto skipList = SkipListType::create(kHeadHeight);
+  for (int i = 0; i < maxValue; ++i) {
+    skipList.add(i);
+  }
+
+  vector<boost::thread> threads;
+  vector<SetType > verifiers(numThreads);
+  for (int i = 0; i < numThreads; ++i) {
+    threads.push_back(boost::thread(
+          &randomRemoval, 100, skipList, &verifiers[i], maxValue));
+  }
+  FOR_EACH(t, threads) {
+    (*t).join();
+  }
+
+  SetType all;
+  FOR_EACH(s, verifiers) {
+    all.insert(s->begin(), s->end());
+  }
+
+  CHECK_EQ(maxValue, all.size() + skipList.size());
+  for (int i = 0; i < maxValue; ++i) {
+    if (all.find(i) != all.end()) {
+      CHECK(!skipList.contains(i)) << i;
+    } else {
+      CHECK(skipList.contains(i)) << i;
+    }
+  }
+}
+
+TEST(ConcurrentSkipList, ConcurrentRemove) {
+  for (int numThreads = 10; numThreads < 1000; numThreads += 100) {
+    testConcurrentRemoval(numThreads, 100 * numThreads);
+  }
+}
+
+static void testConcurrentAccess(
+    int numInsertions, int numDeletions, int maxValue) {
+  auto skipList = SkipListType::create(kHeadHeight);
+
+  vector<SetType> verifiers(FLAGS_num_threads);
+  vector<int64_t> sums(FLAGS_num_threads);
+  vector<vector<ValueType> > skipValues(FLAGS_num_threads);
+
+  for (int i = 0; i < FLAGS_num_threads; ++i) {
+    for (int j = 0; j < numInsertions; ++j) {
+      skipValues[i].push_back(rand() % (maxValue + 1));
+    }
+    std::sort(skipValues[i].begin(), skipValues[i].end());
+  }
+
+  vector<boost::thread> threads;
+  for (int i = 0; i < FLAGS_num_threads; ++i) {
+    switch (i % 8) {
+      case 0:
+      case 1:
+        threads.push_back(boost::thread(
+              randomAdding, numInsertions, skipList, &verifiers[i], maxValue));
+        break;
+      case 2:
+        threads.push_back(boost::thread(
+              randomRemoval, numDeletions, skipList, &verifiers[i], maxValue));
+        break;
+      case 3:
+        threads.push_back(boost::thread(
+              concurrentSkip, &skipValues[i], skipList));
+        break;
+      default:
+        threads.push_back(boost::thread(sumAllValues, skipList, &sums[i]));
+        break;
+    }
+  }
+
+  FOR_EACH(t, threads) {
+    (*t).join();
+  }
+  // just run through it, no need to verify the correctness.
+}
+
+TEST(ConcurrentSkipList, ConcurrentAccess) {
+  testConcurrentAccess(10000, 100, kMaxValue);
+  testConcurrentAccess(100000, 10000, kMaxValue * 10);
+  testConcurrentAccess(1000000, 100000, kMaxValue);
+}
+
+}  // namespace
+
+int main(int argc, char* argv[]) {
+  testing::InitGoogleTest(&argc, argv);
+  google::InitGoogleLogging(argv[0]);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/folly/test/ConvTest.cpp b/folly/test/ConvTest.cpp
new file mode 100644
index 00000000..a67eef85
--- /dev/null
+++ b/folly/test/ConvTest.cpp
@@ -0,0 +1,692 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Benchmark.h"
+#include "folly/Conv.h"
+#include "folly/Foreach.h"
+#include <boost/lexical_cast.hpp>
+#include <gtest/gtest.h>
+#include <limits>
+#include <stdexcept>
+
+using namespace std;
+using namespace folly;
+
+static int8_t s8;
+static uint8_t u8;
+static int16_t s16;
+static uint16_t u16;
+static int32_t s32;
+static uint32_t u32;
+static int64_t s64;
+static uint64_t u64;
+
+TEST(Conv, Integral2Integral) {
+  // Same size, different signs
+  s64 = numeric_limits<uint8_t>::max();
+  EXPECT_EQ(to<uint8_t>(s64), s64);
+
+  s64 = numeric_limits<int8_t>::max();
+  EXPECT_EQ(to<int8_t>(s64), s64);
+}
+
+TEST(Conv, Floating2Floating) {
+  float f1 = 1e3;
+  double d1 = to<double>(f1);
+  EXPECT_EQ(f1, d1);
+
+  double d2 = 23.0;
+  auto f2 = to<float>(d2);
+  EXPECT_EQ(double(f2), d2);
+
+  double invalidFloat = std::numeric_limits<double>::max();
+  EXPECT_ANY_THROW(to<float>(invalidFloat));
+  invalidFloat = -std::numeric_limits<double>::max();
+  EXPECT_ANY_THROW(to<float>(invalidFloat));
+
+  try {
+    auto shouldWork = to<float>(std::numeric_limits<double>::min());
+    // The value of `shouldWork' is an implementation defined choice
+    // between the following two alternatives.
+    EXPECT_TRUE(shouldWork == std::numeric_limits<float>::min() ||
+                shouldWork == 0.f);
+  } catch (...) {
+    EXPECT_TRUE(false);
+  }
+}
+
+template <class String>
+void testIntegral2String() {
+}
+
+template <class String, class Int, class... Ints>
+void testIntegral2String() {
+  typedef typename make_unsigned<Int>::type Uint;
+  typedef typename make_signed<Int>::type Sint;
+
+  Uint value = 123;
+  EXPECT_EQ(to<String>(value), "123");
+  Sint svalue = 123;
+  EXPECT_EQ(to<String>(svalue), "123");
+  svalue = -123;
+  EXPECT_EQ(to<String>(svalue), "-123");
+
+  value = numeric_limits<Uint>::min();
+  EXPECT_EQ(to<Uint>(to<String>(value)), value);
+  value = numeric_limits<Uint>::max();
+  EXPECT_EQ(to<Uint>(to<String>(value)), value);
+
+  svalue = numeric_limits<Sint>::min();
+  EXPECT_EQ(to<Sint>(to<String>(svalue)), svalue);
+  value = numeric_limits<Sint>::max();
+  EXPECT_EQ(to<Sint>(to<String>(svalue)), svalue);
+
+  testIntegral2String<String, Ints...>();
+}
+
+TEST(Conv, Integral2String) {
+  testIntegral2String<std::string, char, short, int, long>();
+  testIntegral2String<fbstring, char, short, int, long>();
+}
+
+template <class String>
+void testString2Integral() {
+}
+
+template <class String, class Int, class... Ints>
+void testString2Integral() {
+  typedef typename make_unsigned<Int>::type Uint;
+  typedef typename make_signed<Int>::type Sint;
+
+  // Unsigned numbers small enough to fit in a signed type
+  static const String strings[] = {
+    "0",
+    "00",
+    "2 ",
+    " 84",
+    " \n 123    \t\n",
+    " 127",
+    "0000000000000000000000000042"
+  };
+  static const Uint values[] = {
+    0,
+    0,
+    2,
+    84,
+    123,
+    127,
+    42
+  };
+  FOR_EACH_RANGE (i, 0, sizeof(strings) / sizeof(*strings)) {
+    EXPECT_EQ(to<Uint>(strings[i]), values[i]);
+    EXPECT_EQ(to<Sint>(strings[i]), values[i]);
+  }
+
+  // Unsigned numbers that won't fit in the signed variation
+  static const String uStrings[] = {
+    " 128",
+    "213",
+    "255"
+  };
+  static const Uint uValues[] = {
+    128,
+    213,
+    255
+  };
+  FOR_EACH_RANGE (i, 0, sizeof(uStrings)/sizeof(*uStrings)) {
+    EXPECT_EQ(to<Uint>(uStrings[i]), uValues[i]);
+    if (sizeof(Int) == 1) {
+      EXPECT_THROW(to<Sint>(uStrings[i]), std::range_error);
+    }
+  }
+
+  if (sizeof(Int) >= 4) {
+    static const String strings2[] = {
+      "256",
+      "6324 ",
+      "63245675 ",
+      "2147483647"
+    };
+    static const Uint values2[] = {
+      (Uint)256,
+      (Uint)6324,
+      (Uint)63245675,
+      (Uint)2147483647
+    };
+    FOR_EACH_RANGE (i, 0, sizeof(strings2)/sizeof(*strings2)) {
+      EXPECT_EQ(to<Uint>(strings2[i]), values2[i]);
+      EXPECT_EQ(to<Sint>(strings2[i]), values2[i]);
+    }
+
+    static const String uStrings2[] = {
+      "2147483648",
+      "3147483648",
+      "4147483648",
+      "4000000000",
+    };
+    static const Uint uValues2[] = {
+      (Uint)2147483648U,
+      (Uint)3147483648U,
+      (Uint)4147483648U,
+      (Uint)4000000000U,
+    };
+    FOR_EACH_RANGE (i, 0, sizeof(uStrings2)/sizeof(uStrings2)) {
+      EXPECT_EQ(to<Uint>(uStrings2[i]), uValues2[i]);
+      if (sizeof(Int) == 4) {
+        EXPECT_THROW(to<Sint>(uStrings2[i]), std::range_error);
+      }
+    }
+  }
+
+  if (sizeof(Int) >= 8) {
+    static_assert(sizeof(Int) <= 8, "Now that would be interesting");
+    static const String strings3[] = {
+      "2147483648",
+      "5000000001",
+      "25687346509278435",
+      "100000000000000000",
+      "9223372036854775807",
+    };
+    static const Uint values3[] = {
+      (Uint)2147483648ULL,
+      (Uint)5000000001ULL,
+      (Uint)25687346509278435ULL,
+      (Uint)100000000000000000ULL,
+      (Uint)9223372036854775807ULL,
+    };
+    FOR_EACH_RANGE (i, 0, sizeof(strings3)/sizeof(*strings3)) {
+      EXPECT_EQ(to<Uint>(strings3[i]), values3[i]);
+      EXPECT_EQ(to<Sint>(strings3[i]), values3[i]);
+    }
+
+    static const String uStrings3[] = {
+      "9223372036854775808",
+      "9987435987394857987",
+      "17873648761234698740",
+      "18446744073709551615",
+    };
+    static const Uint uValues3[] = {
+      (Uint)9223372036854775808ULL,
+      (Uint)9987435987394857987ULL,
+      (Uint)17873648761234698740ULL,
+      (Uint)18446744073709551615ULL,
+    };
+    FOR_EACH_RANGE (i, 0, sizeof(uStrings3)/sizeof(*uStrings3)) {
+      EXPECT_EQ(to<Uint>(uStrings3[i]), uValues3[i]);
+      if (sizeof(Int) == 8) {
+        EXPECT_THROW(to<Sint>(uStrings3[i]), std::range_error);
+      }
+    }
+  }
+
+  // Minimum possible negative values, and negative sign overflow
+  static const String strings4[] = {
+    "-128",
+    "-32768",
+    "-2147483648",
+    "-9223372036854775808",
+  };
+  static const String strings5[] = {
+    "-129",
+    "-32769",
+    "-2147483649",
+    "-9223372036854775809",
+  };
+  static const Sint values4[] = {
+    (Sint)-128LL,
+    (Sint)-32768LL,
+    (Sint)-2147483648LL,
+    (Sint)(-9223372036854775807LL - 1),
+  };
+  FOR_EACH_RANGE (i, 0, sizeof(strings4)/sizeof(*strings4)) {
+    if (sizeof(Int) > std::pow(2, i)) {
+      EXPECT_EQ(values4[i], to<Sint>(strings4[i]));
+      EXPECT_EQ(values4[i] - 1, to<Sint>(strings5[i]));
+    } else if (sizeof(Int) == std::pow(2, i)) {
+      EXPECT_EQ(values4[i], to<Sint>(strings4[i]));
+      EXPECT_THROW(to<Sint>(strings5[i]), std::range_error);
+    } else {
+      EXPECT_THROW(to<Sint>(strings4[i]), std::range_error);
+      EXPECT_THROW(to<Sint>(strings5[i]), std::range_error);
+    }
+  }
+
+  // Bogus string values
+  static const String bogusStrings[] = {
+    "",
+    "0x1234",
+    "123L",
+    "123a",
+    "x 123 ",
+    "234 y",
+    "- 42",  // whitespace is not allowed between the sign and the value
+    " +   13 ",
+    "12345678901234567890123456789",
+  };
+  for (const auto& str : bogusStrings) {
+    EXPECT_THROW(to<Sint>(str), std::range_error);
+    EXPECT_THROW(to<Uint>(str), std::range_error);
+  }
+
+  // A leading '+' character is only allowed when converting to signed types.
+  String posSign("+42");
+  EXPECT_EQ(42, to<Sint>(posSign));
+  EXPECT_THROW(to<Uint>(posSign), std::range_error);
+
+  testString2Integral<String, Ints...>();
+}
+
+TEST(Conv, String2Integral) {
+  testString2Integral<const char*, signed char, short, int, long, long long>();
+  testString2Integral<std::string, signed char, short, int, long, long long>();
+  testString2Integral<fbstring, signed char, short, int, long, long long>();
+
+  // Testing the behavior of the StringPiece* API
+  // StringPiece* normally parses as much valid data as it can,
+  // and advances the StringPiece to the end of the valid data.
+  char buf1[] = "100foo";
+  StringPiece sp1(buf1);
+  EXPECT_EQ(100, to<uint8_t>(&sp1));
+  EXPECT_EQ(buf1 + 3, sp1.begin());
+  // However, if the next character would cause an overflow it throws a
+  // range_error rather than consuming only as much as it can without
+  // overflowing.
+  char buf2[] = "1002";
+  StringPiece sp2(buf2);
+  EXPECT_THROW(to<uint8_t>(&sp2), std::range_error);
+  EXPECT_EQ(buf2, sp2.begin());
+}
+
+TEST(Conv, StringPiece2Integral) {
+  string s = "  +123  hello world  ";
+  StringPiece sp = s;
+  EXPECT_EQ(to<int>(&sp), 123);
+  EXPECT_EQ(sp, "  hello world  ");
+}
+
+TEST(Conv, StringPieceAppend) {
+  string s = "foobar";
+  {
+    StringPiece sp(s, 0, 3);
+    string result = to<string>(s, sp);
+    EXPECT_EQ(result, "foobarfoo");
+  }
+  {
+    StringPiece sp1(s, 0, 3);
+    StringPiece sp2(s, 3, 3);
+    string result = to<string>(sp1, sp2);
+    EXPECT_EQ(result, s);
+  }
+}
+
+TEST(Conv, BadStringToIntegral) {
+  // Note that leading spaces (e.g.  " 1") are valid.
+  vector<string> v = { "a", "", " ", "\n", " a0", "abcdef", "1Z", "!#" };
+  for (auto& s: v) {
+    EXPECT_THROW(to<int>(s), std::range_error) << "s=" << s;
+  }
+}
+
+template <class String>
+void testVariadicTo() {
+  String s;
+  toAppend(&s);
+  toAppend("Lorem ipsum ", 1234, String(" dolor amet "), 567.89, '!', &s);
+  EXPECT_EQ(s, "Lorem ipsum 1234 dolor amet 567.89!");
+
+  s = to<String>();
+  EXPECT_TRUE(s.empty());
+
+  s = to<String>("Lorem ipsum ", nullptr, 1234, " dolor amet ", 567.89, '.');
+  EXPECT_EQ(s, "Lorem ipsum 1234 dolor amet 567.89.");
+}
+
+TEST(Conv, NullString) {
+  string s1 = to<string>((char *) NULL);
+  EXPECT_TRUE(s1.empty());
+  fbstring s2 = to<fbstring>((char *) NULL);
+  EXPECT_TRUE(s2.empty());
+}
+
+TEST(Conv, VariadicTo) {
+  testVariadicTo<string>();
+  testVariadicTo<fbstring>();
+}
+
+template <class String>
+void testDoubleToString() {
+  EXPECT_EQ(to<string>(0.0), "0");
+  EXPECT_EQ(to<string>(0.5), "0.5");
+  EXPECT_EQ(to<string>(10.25), "10.25");
+  EXPECT_EQ(to<string>(1.123e10), "11230000000");
+}
+
+TEST(Conv, DoubleToString) {
+  testDoubleToString<string>();
+  testDoubleToString<fbstring>();
+}
+
+TEST(Conv, FBStringToString) {
+  fbstring foo("foo");
+  string ret = to<string>(foo);
+  EXPECT_EQ(ret, "foo");
+  string ret2 = to<string>(foo, 2);
+  EXPECT_EQ(ret2, "foo2");
+}
+
+TEST(Conv, StringPieceToDouble) {
+  string s = "2134123.125 zorro";
+  StringPiece pc(s);
+  EXPECT_EQ(to<double>(&pc), 2134123.125);
+  EXPECT_EQ(pc, " zorro");
+
+  EXPECT_THROW(to<double>(StringPiece(s)), std::range_error);
+  EXPECT_EQ(to<double>(StringPiece(s.data(), pc.data())), 2134123.125);
+
+// Test NaN conversion
+  try {
+    to<double>("not a number");
+    EXPECT_TRUE(false);
+  } catch (const std::range_error &) {
+  }
+
+  EXPECT_TRUE(std::isnan(to<double>("NaN")));
+  EXPECT_EQ(to<double>("inf"), numeric_limits<double>::infinity());
+  EXPECT_EQ(to<double>("infinity"), numeric_limits<double>::infinity());
+  EXPECT_THROW(to<double>("infinitX"), std::range_error);
+  EXPECT_EQ(to<double>("-inf"), -numeric_limits<double>::infinity());
+  EXPECT_EQ(to<double>("-infinity"), -numeric_limits<double>::infinity());
+  EXPECT_THROW(to<double>("-infinitX"), std::range_error);
+}
+
+TEST(Conv, EmptyStringToInt) {
+  string s = "";
+  StringPiece pc(s);
+
+  try {
+    to<int>(pc);
+    EXPECT_TRUE(false);
+  } catch (const std::range_error &) {
+  }
+}
+
+TEST(Conv, CorruptedStringToInt) {
+  string s = "-1";
+  StringPiece pc(s.data(), s.data() + 1); // Only  "-"
+
+  try {
+    to<int64_t>(&pc);
+    EXPECT_TRUE(false);
+  } catch (const std::range_error &) {
+  }
+}
+
+TEST(Conv, EmptyStringToDouble) {
+  string s = "";
+  StringPiece pc(s);
+
+  try {
+    to<double>(pc);
+    EXPECT_TRUE(false);
+  } catch (const std::range_error &) {
+  }
+}
+
+TEST(Conv, IntToDouble) {
+  auto d = to<double>(42);
+  EXPECT_EQ(d, 42);
+  /* This seems not work in ubuntu11.10, gcc 4.6.1
+  try {
+    auto f = to<float>(957837589847);
+    EXPECT_TRUE(false);
+  } catch (std::range_error& e) {
+    //LOG(INFO) << e.what();
+  }
+  */
+}
+
+TEST(Conv, DoubleToInt) {
+  auto i = to<int>(42.0);
+  EXPECT_EQ(i, 42);
+  try {
+    auto i = to<int>(42.1);
+    EXPECT_TRUE(false);
+  } catch (std::range_error& e) {
+    //LOG(INFO) << e.what();
+  }
+}
+
+TEST(Conv, EnumToInt) {
+  enum A { x = 42, y = 420, z = 65 };
+  auto i = to<int>(x);
+  EXPECT_EQ(i, 42);
+  auto j = to<char>(x);
+  EXPECT_EQ(j, 42);
+  try {
+    auto i = to<char>(y);
+    LOG(ERROR) << static_cast<unsigned int>(i);
+    EXPECT_TRUE(false);
+  } catch (std::range_error& e) {
+    //LOG(INFO) << e.what();
+  }
+}
+
+TEST(Conv, EnumToString) {
+  // task 813959
+  enum A { x = 4, y = 420, z = 65 };
+  EXPECT_EQ("foo.4", to<string>("foo.", x));
+  EXPECT_EQ("foo.420", to<string>("foo.", y));
+  EXPECT_EQ("foo.65", to<string>("foo.", z));
+}
+
+TEST(Conv, IntToEnum) {
+  enum A { x = 42, y = 420 };
+  auto i = to<A>(42);
+  EXPECT_EQ(i, A::x);
+  auto j = to<A>(100);
+  EXPECT_EQ(j, 100);
+  try {
+    auto i = to<A>(5000000000L);
+    EXPECT_TRUE(false);
+  } catch (std::range_error& e) {
+    //LOG(INFO) << e.what();
+  }
+}
+
+template<typename Src>
+void testStr2Bool() {
+  EXPECT_FALSE(to<bool>(Src("0")));
+  EXPECT_FALSE(to<bool>(Src("  000  ")));
+
+  EXPECT_FALSE(to<bool>(Src("n")));
+  EXPECT_FALSE(to<bool>(Src("no")));
+  EXPECT_FALSE(to<bool>(Src("false")));
+  EXPECT_FALSE(to<bool>(Src("False")));
+  EXPECT_FALSE(to<bool>(Src("  fAlSe"  )));
+  EXPECT_FALSE(to<bool>(Src("F")));
+  EXPECT_FALSE(to<bool>(Src("off")));
+
+  EXPECT_TRUE(to<bool>(Src("1")));
+  EXPECT_TRUE(to<bool>(Src("  001 ")));
+  EXPECT_TRUE(to<bool>(Src("y")));
+  EXPECT_TRUE(to<bool>(Src("yes")));
+  EXPECT_TRUE(to<bool>(Src("\nyEs\t")));
+  EXPECT_TRUE(to<bool>(Src("true")));
+  EXPECT_TRUE(to<bool>(Src("True")));
+  EXPECT_TRUE(to<bool>(Src("T")));
+  EXPECT_TRUE(to<bool>(Src("on")));
+
+  EXPECT_THROW(to<bool>(Src("")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("2")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("11")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("19")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("o")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("fal")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("tru")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("ye")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("yes foo")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("bar no")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("one")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("true_")), std::range_error);
+  EXPECT_THROW(to<bool>(Src("bogus_token_that_is_too_long")),
+               std::range_error);
+}
+
+TEST(Conv, StringToBool) {
+  // testStr2Bool<const char *>();
+  testStr2Bool<std::string>();
+
+  // Test with strings that are not NUL terminated.
+  const char buf[] = "01234";
+  EXPECT_FALSE(to<bool>(StringPiece(buf, buf + 1)));  // "0"
+  EXPECT_TRUE(to<bool>(StringPiece(buf + 1, buf + 2)));  // "1"
+  const char buf2[] = "one two three";
+  EXPECT_TRUE(to<bool>(StringPiece(buf2, buf2 + 2)));  // "on"
+  const char buf3[] = "false";
+  EXPECT_THROW(to<bool>(StringPiece(buf3, buf3 + 3)),  // "fal"
+               std::range_error);
+
+  // Test the StringPiece* API
+  const char buf4[] = "001foo";
+  StringPiece sp4(buf4);
+  EXPECT_TRUE(to<bool>(&sp4));
+  EXPECT_EQ(buf4 + 3, sp4.begin());
+  const char buf5[] = "0012";
+  StringPiece sp5(buf5);
+  EXPECT_THROW(to<bool>(&sp5), std::range_error);
+  EXPECT_EQ(buf5, sp5.begin());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Benchmarks for ASCII to int conversion
+////////////////////////////////////////////////////////////////////////////////
+// @author: Rajat Goel (rajat)
+
+static int64_t handwrittenAtoi(const char* start, const char* end) {
+
+  bool positive = true;
+  int64_t retVal = 0;
+
+  if (start == end) {
+    throw std::runtime_error("empty string");
+  }
+
+  while (start < end && isspace(*start)) {
+    ++start;
+  }
+
+  switch (*start) {
+    case '-':
+      positive = false;
+    case '+':
+      ++start;
+    default:;
+  }
+
+  while (start < end && *start >= '0' && *start <= '9') {
+    auto const newRetVal = retVal * 10 + (*start++ - '0');
+    if (newRetVal < retVal) {
+      throw std::runtime_error("overflow");
+    }
+    retVal = newRetVal;
+  }
+
+  if (start != end) {
+    throw std::runtime_error("extra chars at the end");
+  }
+
+  return positive ? retVal : -retVal;
+}
+
+static StringPiece pc1 = "1234567890123456789";
+
+void handwrittenAtoiMeasure(uint n, uint digits) {
+  auto p = pc1.subpiece(pc1.size() - digits, digits);
+  FOR_EACH_RANGE (i, 0, n) {
+    doNotOptimizeAway(handwrittenAtoi(p.begin(), p.end()));
+  }
+}
+
+void follyAtoiMeasure(uint n, uint digits) {
+  auto p = pc1.subpiece(pc1.size() - digits, digits);
+  FOR_EACH_RANGE (i, 0, n) {
+    doNotOptimizeAway(folly::to<int64_t>(p.begin(), p.end()));
+  }
+}
+
+void clibAtoiMeasure(uint n, uint digits) {
+  auto p = pc1.subpiece(pc1.size() - digits, digits);
+  assert(*p.end() == 0);
+  static_assert(sizeof(long) == 8, "64-bit long assumed");
+  FOR_EACH_RANGE (i, 0, n) {
+    doNotOptimizeAway(atol(p.begin()));
+  }
+}
+
+void clibStrtoulMeasure(uint n, uint digits) {
+  auto p = pc1.subpiece(pc1.size() - digits, digits);
+  assert(*p.end() == 0);
+  char * endptr;
+  FOR_EACH_RANGE (i, 0, n) {
+    doNotOptimizeAway(strtoul(p.begin(), &endptr, 10));
+  }
+}
+
+void lexicalCastMeasure(uint n, uint digits) {
+  auto p = pc1.subpiece(pc1.size() - digits, digits);
+  assert(*p.end() == 0);
+  FOR_EACH_RANGE (i, 0, n) {
+    doNotOptimizeAway(boost::lexical_cast<uint64_t>(p.begin()));
+  }
+}
+
+#define DEFINE_BENCHMARK_GROUP(n)                       \
+  BENCHMARK_PARAM(clibAtoiMeasure, n);                  \
+  BENCHMARK_RELATIVE_PARAM(lexicalCastMeasure, n);      \
+  BENCHMARK_RELATIVE_PARAM(handwrittenAtoiMeasure, n);  \
+  BENCHMARK_RELATIVE_PARAM(follyAtoiMeasure, n);
+
+DEFINE_BENCHMARK_GROUP(1);
+DEFINE_BENCHMARK_GROUP(2);
+DEFINE_BENCHMARK_GROUP(3);
+DEFINE_BENCHMARK_GROUP(4);
+DEFINE_BENCHMARK_GROUP(5);
+DEFINE_BENCHMARK_GROUP(6);
+DEFINE_BENCHMARK_GROUP(7);
+DEFINE_BENCHMARK_GROUP(8);
+DEFINE_BENCHMARK_GROUP(9);
+DEFINE_BENCHMARK_GROUP(10);
+DEFINE_BENCHMARK_GROUP(11);
+DEFINE_BENCHMARK_GROUP(12);
+DEFINE_BENCHMARK_GROUP(13);
+DEFINE_BENCHMARK_GROUP(14);
+DEFINE_BENCHMARK_GROUP(15);
+DEFINE_BENCHMARK_GROUP(16);
+DEFINE_BENCHMARK_GROUP(17);
+DEFINE_BENCHMARK_GROUP(18);
+DEFINE_BENCHMARK_GROUP(19);
+
+#undef DEFINE_BENCHMARK_GROUP
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return ret;
+}
diff --git a/folly/test/DiscriminatedPtrTest.cpp b/folly/test/DiscriminatedPtrTest.cpp
new file mode 100644
index 00000000..f06115a9
--- /dev/null
+++ b/folly/test/DiscriminatedPtrTest.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/DiscriminatedPtr.h"
+
+#include <gtest/gtest.h>
+
+using namespace folly;
+
+TEST(DiscriminatedPtr, Basic) {
+  struct Foo { };
+  struct Bar { };
+  typedef DiscriminatedPtr<void, int, Foo, Bar> Ptr;
+
+  int a = 10;
+  Ptr p;
+  EXPECT_TRUE(p.empty());
+  EXPECT_FALSE(p.hasType<void>());
+  EXPECT_FALSE(p.hasType<int>());
+  EXPECT_FALSE(p.hasType<Foo>());
+  EXPECT_FALSE(p.hasType<Bar>());
+
+  p.set(&a);
+  EXPECT_FALSE(p.empty());
+  EXPECT_FALSE(p.hasType<void>());
+  EXPECT_TRUE(p.hasType<int>());
+  EXPECT_FALSE(p.hasType<Foo>());
+  EXPECT_FALSE(p.hasType<Bar>());
+
+  EXPECT_EQ(&a, p.get_nothrow<int>());
+  EXPECT_EQ(&a, static_cast<const Ptr&>(p).get_nothrow<int>());
+  EXPECT_EQ(&a, p.get<int>());
+  EXPECT_EQ(&a, static_cast<const Ptr&>(p).get<int>());
+  EXPECT_EQ(static_cast<void*>(NULL), p.get_nothrow<void>());
+  EXPECT_THROW({p.get<void>();}, std::invalid_argument);
+
+  Foo foo;
+  p.set(&foo);
+  EXPECT_FALSE(p.empty());
+  EXPECT_FALSE(p.hasType<void>());
+  EXPECT_FALSE(p.hasType<int>());
+  EXPECT_TRUE(p.hasType<Foo>());
+  EXPECT_FALSE(p.hasType<Bar>());
+
+  EXPECT_EQ(static_cast<int*>(NULL), p.get_nothrow<int>());
+
+  p.clear();
+  EXPECT_TRUE(p.empty());
+  EXPECT_FALSE(p.hasType<void>());
+  EXPECT_FALSE(p.hasType<int>());
+  EXPECT_FALSE(p.hasType<Foo>());
+  EXPECT_FALSE(p.hasType<Bar>());
+}
+
+TEST(DiscriminatedPtr, Apply) {
+  struct Foo { };
+  struct Visitor {
+    std::string operator()(int* ptr) { return "int"; }
+    std::string operator()(const int* ptr) { return "const int"; }
+    std::string operator()(Foo* ptr) { return "Foo"; }
+    std::string operator()(const Foo* ptr) { return "const Foo"; }
+  };
+
+  typedef DiscriminatedPtr<int, Foo> Ptr;
+  Ptr p;
+
+  int a = 0;
+  p.set(&a);
+  EXPECT_EQ("int", p.apply(Visitor()));
+  EXPECT_EQ("const int", static_cast<const Ptr&>(p).apply(Visitor()));
+
+  Foo foo;
+  p.set(&foo);
+  EXPECT_EQ("Foo", p.apply(Visitor()));
+  EXPECT_EQ("const Foo", static_cast<const Ptr&>(p).apply(Visitor()));
+
+  p.clear();
+  EXPECT_THROW({p.apply(Visitor());}, std::invalid_argument);
+}
+
+TEST(DiscriminatedPtr, ApplyVoid) {
+  struct Foo { };
+  struct Visitor {
+    void operator()(int* ptr) { result = "int"; }
+    void operator()(const int* ptr) { result = "const int"; }
+    void operator()(Foo* ptr) { result = "Foo"; }
+    void operator()(const Foo* ptr) { result = "const Foo"; }
+
+    std::string result;
+  };
+
+  typedef DiscriminatedPtr<int, Foo> Ptr;
+  Ptr p;
+  Visitor v;
+
+  int a = 0;
+  p.set(&a);
+  p.apply(v);
+  EXPECT_EQ("int", v.result);
+  static_cast<const Ptr&>(p).apply(v);
+  EXPECT_EQ("const int", v.result);
+
+  Foo foo;
+  p.set(&foo);
+  p.apply(v);
+  EXPECT_EQ("Foo", v.result);
+  static_cast<const Ptr&>(p).apply(v);
+  EXPECT_EQ("const Foo", v.result);
+
+  p.clear();
+  EXPECT_THROW({p.apply(v);}, std::invalid_argument);
+}
diff --git a/folly/test/DynamicTest.cpp b/folly/test/DynamicTest.cpp
new file mode 100644
index 00000000..c1d6f943
--- /dev/null
+++ b/folly/test/DynamicTest.cpp
@@ -0,0 +1,274 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/dynamic.h"
+#include "folly/json.h"
+#include <gtest/gtest.h>
+#include <gflags/gflags.h>
+#include <boost/next_prior.hpp>
+#include "folly/Benchmark.h"
+
+using folly::dynamic;
+
+TEST(Dynamic, ObjectBasics) {
+  dynamic obj = dynamic::object("a", false);
+  EXPECT_EQ(obj.at("a"), false);
+  EXPECT_EQ(obj.size(), 1);
+  obj.insert("a", true);
+  EXPECT_EQ(obj.size(), 1);
+  EXPECT_EQ(obj.at("a"), true);
+  obj.at("a") = nullptr;
+  EXPECT_EQ(obj.size(), 1);
+  EXPECT_TRUE(obj.at("a") == nullptr);
+
+  dynamic newObject = dynamic::object;
+
+  newObject["z"] = 12;
+  EXPECT_EQ(newObject.size(), 1);
+  newObject["a"] = true;
+  EXPECT_EQ(newObject.size(), 2);
+
+  EXPECT_EQ(*newObject.keys().begin(), newObject.items().begin()->first);
+  EXPECT_EQ(*newObject.values().begin(), newObject.items().begin()->second);
+  std::vector<std::pair<folly::fbstring, dynamic>> found;
+  found.push_back(std::make_pair(
+     newObject.keys().begin()->asString(),
+     *newObject.values().begin()));
+
+  EXPECT_EQ(*boost::next(newObject.keys().begin()),
+            boost::next(newObject.items().begin())->first);
+  EXPECT_EQ(*boost::next(newObject.values().begin()),
+            boost::next(newObject.items().begin())->second);
+  found.push_back(std::make_pair(
+      boost::next(newObject.keys().begin())->asString(),
+      *boost::next(newObject.values().begin())));
+
+  std::sort(found.begin(), found.end());
+
+  EXPECT_EQ("a", found[0].first);
+  EXPECT_TRUE(found[0].second.asBool());
+
+  EXPECT_EQ("z", found[1].first);
+  EXPECT_EQ(12, found[1].second.asInt());
+
+  dynamic obj2 = dynamic::object;
+  EXPECT_TRUE(obj2.isObject());
+
+  dynamic d3 = nullptr;
+  EXPECT_TRUE(d3 == nullptr);
+  d3 = dynamic::object;
+  EXPECT_TRUE(d3.isObject());
+  d3["foo"] = { 1, 2, 3 };
+  EXPECT_EQ(d3.count("foo"), 1);
+
+  d3[123] = 321;
+  EXPECT_EQ(d3.at(123), 321);
+
+  d3["123"] = 42;
+  EXPECT_EQ(d3.at("123"), 42);
+  EXPECT_EQ(d3.at(123), 321);
+
+  // We don't allow objects as keys in objects.
+  EXPECT_ANY_THROW(newObject[d3] = 12);
+}
+
+TEST(Dynamic, ObjectErase) {
+  dynamic obj = dynamic::object("key1", "val")
+                               ("key2", "val2");
+  EXPECT_EQ(obj.count("key1"), 1);
+  EXPECT_EQ(obj.count("key2"), 1);
+  EXPECT_EQ(obj.erase("key1"), 1);
+  EXPECT_EQ(obj.count("key1"), 0);
+  EXPECT_EQ(obj.count("key2"), 1);
+  EXPECT_EQ(obj.erase("key1"), 0);
+  obj["key1"] = 12;
+  EXPECT_EQ(obj.count("key1"), 1);
+  EXPECT_EQ(obj.count("key2"), 1);
+  auto it = obj.find("key2");
+  obj.erase(it);
+  EXPECT_EQ(obj.count("key1"), 1);
+  EXPECT_EQ(obj.count("key2"), 0);
+
+  obj["asd"] = 42.0;
+  obj["foo"] = 42.0;
+  EXPECT_EQ(obj.size(), 3);
+  auto ret = obj.erase(boost::next(obj.items().begin()), obj.items().end());
+  EXPECT_TRUE(ret == obj.items().end());
+  EXPECT_EQ(obj.size(), 1);
+  obj.erase(obj.items().begin());
+  EXPECT_TRUE(obj.empty());
+}
+
+TEST(Dynamic, ArrayErase) {
+  dynamic arr = { 1, 2, 3, 4, 5, 6 };
+
+  EXPECT_THROW(arr.erase(1), std::exception);
+  EXPECT_EQ(arr.size(), 6);
+  EXPECT_EQ(arr[0], 1);
+  arr.erase(arr.begin());
+  EXPECT_EQ(arr.size(), 5);
+
+  arr.erase(boost::next(arr.begin()), boost::prior(arr.end()));
+  EXPECT_EQ(arr.size(), 2);
+  EXPECT_EQ(arr[0], 2);
+  EXPECT_EQ(arr[1], 6);
+}
+
+TEST(Dynamic, StringBasics) {
+  dynamic str = "hello world";
+  EXPECT_EQ(11, str.size());
+  EXPECT_FALSE(str.empty());
+  str = "";
+  EXPECT_TRUE(str.empty());
+}
+
+TEST(Dynamic, ArrayBasics) {
+  dynamic array = { 1, 2, 3 };
+  EXPECT_EQ(array.size(), 3);
+  EXPECT_EQ(array.at(0), 1);
+  EXPECT_EQ(array.at(1), 2);
+  EXPECT_EQ(array.at(2), 3);
+
+  EXPECT_ANY_THROW(array.at(3));
+
+  array.push_back("foo");
+  EXPECT_EQ(array.size(), 4);
+
+  array.resize(12, "something");
+  EXPECT_EQ(array.size(), 12);
+  EXPECT_EQ(array[11], "something");
+}
+
+TEST(Dynamic, DeepCopy) {
+  dynamic val = { "foo", "bar", { "foo1", "bar1" } };
+  EXPECT_EQ(val.at(2).at(0), "foo1");
+  EXPECT_EQ(val.at(2).at(1), "bar1");
+  dynamic val2 = val;
+  EXPECT_EQ(val2.at(2).at(0), "foo1");
+  EXPECT_EQ(val2.at(2).at(1), "bar1");
+  EXPECT_EQ(val.at(2).at(0), "foo1");
+  EXPECT_EQ(val.at(2).at(1), "bar1");
+  val2.at(2).at(0) = "foo3";
+  val2.at(2).at(1) = "bar3";
+  EXPECT_EQ(val.at(2).at(0), "foo1");
+  EXPECT_EQ(val.at(2).at(1), "bar1");
+  EXPECT_EQ(val2.at(2).at(0), "foo3");
+  EXPECT_EQ(val2.at(2).at(1), "bar3");
+
+  dynamic obj = dynamic::object("a", "b")
+                               ("c", {"d", "e", "f"})
+                               ;
+  EXPECT_EQ(obj.at("a"), "b");
+  dynamic obj2 = obj;
+  obj2.at("a") = {1, 2, 3};
+  EXPECT_EQ(obj.at("a"), "b");
+  dynamic expected = {1, 2, 3};
+  EXPECT_EQ(obj2.at("a"), expected);
+}
+
+TEST(Dynamic, Operator) {
+  bool caught = false;
+  try {
+    dynamic d1 = dynamic::object;
+    dynamic d2 = dynamic::object;
+    auto foo = d1 < d2;
+  } catch (std::exception const& e) {
+    caught = true;
+  }
+  EXPECT_TRUE(caught);
+
+  dynamic foo = "asd";
+  dynamic bar = "bar";
+  dynamic sum = foo + bar;
+  EXPECT_EQ(sum, "asdbar");
+
+  dynamic some = 12;
+  dynamic nums = 4;
+  dynamic math = some / nums;
+  EXPECT_EQ(math, 3);
+}
+
+TEST(Dynamic, Conversions) {
+  dynamic str = "12.0";
+  EXPECT_EQ(str.asDouble(), 12.0);
+  EXPECT_ANY_THROW(str.asInt());
+  EXPECT_ANY_THROW(str.asBool());
+
+  str = "12";
+  EXPECT_EQ(str.asInt(), 12);
+  EXPECT_EQ(str.asDouble(), 12.0);
+  str = "0";
+  EXPECT_EQ(str.asBool(), false);
+  EXPECT_EQ(str.asInt(), 0);
+  EXPECT_EQ(str.asDouble(), 0);
+  EXPECT_EQ(str.asString(), "0");
+}
+
+TEST(Dynamic, FormattedIO) {
+  std::ostringstream out;
+  dynamic doubl = 123.33;
+  dynamic dint = 12;
+  out << "0x" << std::hex << ++dint << ' ' << std::setprecision(1)
+      << doubl << '\n';
+  EXPECT_EQ(out.str(), "0xd 1e+02\n");
+
+  out.str("");
+  dynamic arrr = { 1, 2, 3 };
+  out << arrr;
+  EXPECT_EQ(out.str(), "[1,2,3]");
+
+  out.str("");
+  dynamic objy = dynamic::object("a", 12);
+  out << objy;
+  EXPECT_EQ(out.str(), R"({"a":12})");
+
+  out.str("");
+  dynamic objy2 = { objy, dynamic::object(12, "str"),
+                          dynamic::object(true, false) };
+  out << objy2;
+  EXPECT_EQ(out.str(), R"([{"a":12},{12:"str"},{true:false}])");
+}
+
+TEST(Dynamic, GetSetDefaultTest) {
+  dynamic d1 = dynamic::object("foo", "bar");
+  EXPECT_EQ(d1.getDefault("foo", "baz"), "bar");
+  EXPECT_EQ(d1.getDefault("quux", "baz"), "baz");
+
+  dynamic d2 = dynamic::object("foo", "bar");
+  EXPECT_EQ(d2.setDefault("foo", "quux"), "bar");
+  d2.setDefault("bar", dynamic({})).push_back(42);
+  EXPECT_EQ(d2["bar"][0], 42);
+
+  dynamic d3 = dynamic::object, empty = dynamic::object;
+  EXPECT_EQ(d3.getDefault("foo"), empty);
+  d3.setDefault("foo")["bar"] = "baz";
+  EXPECT_EQ(d3["foo"]["bar"], "baz");
+
+  // we do not allow getDefault/setDefault on arrays
+  dynamic d4 = dynamic({});
+  EXPECT_ANY_THROW(d4.getDefault("foo", "bar"));
+  EXPECT_ANY_THROW(d4.setDefault("foo", "bar"));
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  if (FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return RUN_ALL_TESTS();
+}
+
diff --git a/folly/test/EndianTest.cpp b/folly/test/EndianTest.cpp
new file mode 100644
index 00000000..b2139609
--- /dev/null
+++ b/folly/test/EndianTest.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Bits.h"
+
+#include <gtest/gtest.h>
+
+using namespace folly;
+
+TEST(Endian, Basic) {
+  uint8_t v8 = 0x12;
+  uint8_t v8s = v8;
+  uint16_t v16 = 0x1234;
+  uint16_t v16s = bswap_16(v16);
+  uint32_t v32 = 0x12345678;
+  uint32_t v32s = bswap_32(v32);
+  uint64_t v64 = 0x123456789abcdef0ULL;
+  uint64_t v64s = bswap_64(v64);
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+#define GEN1(sz) \
+  EXPECT_EQ(v##sz, Endian::little(v##sz)); \
+  EXPECT_EQ(v##sz, Endian::little##sz(v##sz)); \
+  EXPECT_EQ(v##sz##s, Endian::big(v##sz)); \
+  EXPECT_EQ(v##sz##s, Endian::big##sz(v##sz));
+
+#elif __BYTE_ORDER == __BIG_ENDIAN
+
+#define GEN1(sz) \
+  EXPECT_EQ(v##sz##s, Endian::little(v##sz)); \
+  EXPECT_EQ(v##sz##s, Endian::little##sz(v##sz)); \
+  EXPECT_EQ(v##sz, Endian::big(v##sz)); \
+  EXPECT_EQ(v##sz, Endian::big##sz(v##sz));
+
+#else
+# error Your machine uses a weird endianness!
+#endif  /* __BYTE_ORDER */
+
+#define GEN(sz) \
+  EXPECT_EQ(v##sz##s, Endian::swap(v##sz)); \
+  EXPECT_EQ(v##sz##s, Endian::swap##sz(v##sz)); \
+  GEN1(sz);
+
+  GEN(8);
+  GEN(16)
+  GEN(32)
+  GEN(64)
+
+#undef GEN
+#undef GEN1
+}
diff --git a/folly/test/EventFDTest.cpp b/folly/test/EventFDTest.cpp
new file mode 100644
index 00000000..4f0c8841
--- /dev/null
+++ b/folly/test/EventFDTest.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+#include <gtest/gtest.h>
+#include "folly/eventfd.h"
+#include <glog/logging.h>
+
+using namespace folly;
+
+TEST(EventFD, Basic) {
+  int fd = eventfd(10, EFD_NONBLOCK);
+  CHECK_ERR(fd);
+  uint64_t val;
+  ssize_t r;
+  // Running this twice -- once with the initial value, and once
+  // after a write()
+  for (int attempt = 0; attempt < 2; attempt++) {
+    val = 0;
+    r = read(fd, &val, sizeof(val));
+    CHECK_ERR(r);
+    EXPECT_EQ(sizeof(val), r);
+    EXPECT_EQ(10, val);
+    r = read(fd, &val, sizeof(val));
+    EXPECT_EQ(-1, r);
+    EXPECT_EQ(EAGAIN, errno);
+    val = 10;
+    r = write(fd, &val, sizeof(val));
+    CHECK_ERR(r);
+    EXPECT_EQ(sizeof(val), r);
+  }
+  close(fd);
+}
+
+TEST(EventFD, Semaphore) {
+  int fd = eventfd(10, EFD_NONBLOCK | EFD_SEMAPHORE);
+  CHECK_ERR(fd);
+  uint64_t val;
+  ssize_t r;
+  // Running this twice -- once with the initial value, and once
+  // after a write()
+  for (int attempt = 0; attempt < 2; attempt++) {
+    val = 0;
+    for (int i = 0; i < 10; i++) {
+      r = read(fd, &val, sizeof(val));
+      CHECK_ERR(r);
+      EXPECT_EQ(sizeof(val), r);
+      EXPECT_EQ(1, val);
+    }
+    r = read(fd, &val, sizeof(val));
+    EXPECT_EQ(-1, r);
+    EXPECT_EQ(EAGAIN, errno);
+    val = 10;
+    r = write(fd, &val, sizeof(val));
+    CHECK_ERR(r);
+    EXPECT_EQ(sizeof(val), r);
+  }
+  close(fd);
+}
+
diff --git a/folly/test/FBStringLibstdcxxStdexceptTest.cpp b/folly/test/FBStringLibstdcxxStdexceptTest.cpp
new file mode 100644
index 00000000..67d0c728
--- /dev/null
+++ b/folly/test/FBStringLibstdcxxStdexceptTest.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/FBString.h"
+
+#ifdef _GLIBCXX_STDEXCEPT
+#error Cannot include <stdexcept> in FBString.h\
+          (use std::__throw_* from funcexcept.h instead)
+#endif
+
+int main(){}
diff --git a/folly/test/FBStringTest.cpp b/folly/test/FBStringTest.cpp
new file mode 100644
index 00000000..3cebf2ba
--- /dev/null
+++ b/folly/test/FBStringTest.cpp
@@ -0,0 +1,1057 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Author: andrei.alexandrescu@fb.com
+
+#include "folly/FBString.h"
+
+#include <list>
+#include <fstream>
+#include <boost/algorithm/string.hpp>
+#include <boost/random.hpp>
+#include <gtest/gtest.h>
+
+#include <gflags/gflags.h>
+
+#include "folly/Foreach.h"
+#include "folly/Random.h"
+#include "folly/Benchmark.h"
+
+using namespace std;
+using namespace folly;
+
+static const int seed = folly::randomNumberSeed();
+typedef boost::mt19937 RandomT;
+static RandomT rng(seed);
+static const size_t maxString = 100;
+static const bool avoidAliasing = true;
+
+template <class Integral1, class Integral2>
+Integral2 random(Integral1 low, Integral2 up) {
+  boost::uniform_int<> range(low, up);
+  return range(rng);
+}
+
+template <class String>
+void randomString(String* toFill, unsigned int maxSize = 1000) {
+  assert(toFill);
+  toFill->resize(random(0, maxSize));
+  FOR_EACH (i, *toFill) {
+    *i = random('a', 'z');
+  }
+}
+
+template <class String, class Integral>
+void Num2String(String& str, Integral n) {
+  str.resize(30, '\0');
+  sprintf(&str[0], "%lu", static_cast<unsigned long>(n));
+  str.resize(strlen(str.c_str()));
+}
+
+std::list<char> RandomList(unsigned int maxSize) {
+  std::list<char> lst(random(0u, maxSize));
+  std::list<char>::iterator i = lst.begin();
+  for (; i != lst.end(); ++i) {
+    *i = random('a', 'z');
+ }
+  return lst;
+}
+
+// void preventOptimization(void * p) {
+//   return folly::preventOptimization((int)(long) p);
+// }
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests begin here
+////////////////////////////////////////////////////////////////////////////////
+
+template <class String> void clause_21_3_1_a(String & test) {
+  test.String::~String();
+  new(&test) String();
+}
+template <class String> void clause_21_3_1_b(String & test) {
+  // Copy constructor
+  const size_t pos = random(0, test.size());
+  String s(test, pos, random(0, (size_t)(test.size() - pos)));
+  test = s;
+}
+template <class String> void clause_21_3_1_c(String & test) {
+  // Constructor from char*, size_t
+  const size_t
+    pos = random(0, test.size()),
+    n = random(0, test.size() - pos);
+  std::string before(test.data(), test.size());
+  String s(test.c_str() + pos, n);
+  std::string after(test.data(), test.size());
+  EXPECT_EQ(before, after);
+
+  // Constructor from char*, char*
+  String s1(test.begin(), test.end());
+  EXPECT_EQ(test, s1);
+  String s2(test.data(), test.data() + test.size());
+  EXPECT_EQ(test, s2);
+
+  // Constructor from iterators
+  std::list<char> lst;
+  for (auto c : test) lst.push_back(c);
+  String s3(lst.begin(), lst.end());
+  EXPECT_EQ(test, s3);
+
+  // Constructor from wchar_t iterators
+  std::list<wchar_t> lst1;
+  for (auto c : test) lst1.push_back(c);
+  String s4(lst1.begin(), lst1.end());
+  EXPECT_EQ(test, s4);
+
+  // Constructor from wchar_t pointers
+  wchar_t t[20];
+  t[0] = 'a';
+  t[1] = 'b';
+  String s5(t, t + 2);;
+  EXPECT_EQ("ab", s5);
+
+  test = s;
+}
+template <class String> void clause_21_3_1_d(String & test) {
+  // Assignment
+  auto size = random(0, 2000);
+  String s(size, '\0');
+  EXPECT_EQ(s.size(), size);
+  FOR_EACH_RANGE (i, 0, s.size()) {
+    s[i] = random('a', 'z');
+  }
+  test = s;
+}
+template <class String> void clause_21_3_1_e(String & test) {
+  // Assignment from char*
+  String s(random(0, 1000), '\0');
+  size_t i = 0;
+  for (; i != s.size(); ++i) {
+    s[i] = random('a', 'z');
+  }
+  test = s.c_str();
+}
+template <class String> void clause_21_3_1_f(String & test) {
+  // Aliased assign
+  const size_t pos = random(0, test.size());
+  if (avoidAliasing) {
+    test = String(test.c_str() + pos);
+  } else {
+    test = test.c_str() + pos;
+  }
+}
+template <class String> void clause_21_3_1_g(String & test) {
+  // Assignment from char
+  test = random('a', 'z');
+}
+
+template <class String> void clause_21_3_2(String & test) {
+  // Iterators. The code below should leave test unchanged
+  EXPECT_EQ(test.size(), test.end() - test.begin());
+  EXPECT_EQ(test.size(), test.rend() - test.rbegin());
+
+  auto s = test.size();
+  test.resize(test.end() - test.begin());
+  EXPECT_EQ(s, test.size());
+  test.resize(test.rend() - test.rbegin());
+  EXPECT_EQ(s, test.size());
+}
+
+template <class String> void clause_21_3_3(String & test) {
+  // exercise capacity, size, max_size
+  EXPECT_EQ(test.size(), test.length());
+  EXPECT_LE(test.size(), test.max_size());
+  EXPECT_LE(test.capacity(), test.max_size());
+  EXPECT_LE(test.size(), test.capacity());
+  // exercise empty
+  if (test.empty()) test = "empty";
+  else test = "not empty";
+}
+
+template <class String> void clause_21_3_4(String & test) {
+  // exercise element access 21.3.4
+  if (!test.empty()) {
+    auto const i = random(0, test.size() - 1);
+    EXPECT_EQ(test[i], test.at(i));
+    test = test[i];
+  }
+}
+
+template <class String> void clause_21_3_5_a(String & test) {
+  // 21.3.5 modifiers (+=)
+  String test1;
+  randomString(&test1);
+  assert(test1.size() == strlen(test1.c_str()));
+  auto len = test.size();
+  test += test1;
+  EXPECT_EQ(test.size(), test1.size() + len);
+  FOR_EACH_RANGE (i, 0, test1.size()) {
+    EXPECT_EQ(test[len + i], test1[i]);
+  }
+  // aliasing modifiers
+  String test2 = test;
+  auto dt = test2.data();
+  auto sz = test.c_str();
+  len = test.size();
+  EXPECT_EQ(memcmp(sz, dt, len), 0);
+  String copy(test.data(), test.size());
+  EXPECT_EQ(strlen(test.c_str()), len);
+  test += test;
+  //test.append(test);
+  EXPECT_EQ(test.size(), 2 * len);
+  EXPECT_EQ(strlen(test.c_str()), 2 * len);
+  FOR_EACH_RANGE (i, 0, len) {
+    EXPECT_EQ(test[i], copy[i]);
+    EXPECT_EQ(test[i], test[len + i]);
+  }
+  len = test.size();
+  EXPECT_EQ(strlen(test.c_str()), len);
+  // more aliasing
+  auto const pos = random(0, test.size());
+  EXPECT_EQ(strlen(test.c_str() + pos), len - pos);
+  if (avoidAliasing) {
+    String addMe(test.c_str() + pos);
+    EXPECT_EQ(addMe.size(), len - pos);
+    test += addMe;
+  } else {
+    test += test.c_str() + pos;
+  }
+  EXPECT_EQ(test.size(), 2 * len - pos);
+  // single char
+  len = test.size();
+  test += random('a', 'z');
+  EXPECT_EQ(test.size(), len + 1);
+}
+
+template <class String> void clause_21_3_5_b(String & test) {
+  // 21.3.5 modifiers (append, push_back)
+  String s;
+
+  // Test with a small string first
+  char c = random('a', 'z');
+  s.push_back(c);
+  EXPECT_EQ(s[s.size() - 1], c);
+  EXPECT_EQ(s.size(), 1);
+  s.resize(s.size() - 1);
+
+  randomString(&s, maxString);
+  test.append(s);
+  randomString(&s, maxString);
+  test.append(s, random(0, s.size()), random(0, maxString));
+  randomString(&s, maxString);
+  test.append(s.c_str(), random(0, s.size()));
+  randomString(&s, maxString);
+  test.append(s.c_str());
+  test.append(random(0, maxString), random('a', 'z'));
+  std::list<char> lst(RandomList(maxString));
+  test.append(lst.begin(), lst.end());
+  c = random('a', 'z');
+  test.push_back(c);
+  EXPECT_EQ(test[test.size() - 1], c);
+}
+
+template <class String> void clause_21_3_5_c(String & test) {
+  // assign
+  String s;
+  randomString(&s);
+  test.assign(s);
+}
+
+template <class String> void clause_21_3_5_d(String & test) {
+  // assign
+  String s;
+  randomString(&s, maxString);
+  test.assign(s, random(0, s.size()), random(0, maxString));
+}
+
+template <class String> void clause_21_3_5_e(String & test) {
+  // assign
+  String s;
+  randomString(&s, maxString);
+  test.assign(s.c_str(), random(0, s.size()));
+}
+
+template <class String> void clause_21_3_5_f(String & test) {
+  // assign
+  String s;
+  randomString(&s, maxString);
+  test.assign(s.c_str());
+}
+
+template <class String> void clause_21_3_5_g(String & test) {
+  // assign
+  String s;
+  randomString(&s, maxString);
+  test.assign(random(0, maxString), random('a', 'z'));
+}
+
+template <class String> void clause_21_3_5_h(String & test) {
+  // assign from bidirectional iterator
+  std::list<char> lst(RandomList(maxString));
+  test.assign(lst.begin(), lst.end());
+}
+
+template <class String> void clause_21_3_5_i(String & test) {
+  // assign from aliased source
+  test.assign(test);
+}
+
+template <class String> void clause_21_3_5_j(String & test) {
+  // assign from aliased source
+  test.assign(test, random(0, test.size()), random(0, maxString));
+}
+
+template <class String> void clause_21_3_5_k(String & test) {
+  // assign from aliased source
+  test.assign(test.c_str(), random(0, test.size()));
+}
+
+template <class String> void clause_21_3_5_l(String & test) {
+  // assign from aliased source
+  test.assign(test.c_str());
+}
+
+template <class String> void clause_21_3_5_m(String & test) {
+  // insert
+  String s;
+  randomString(&s, maxString);
+  test.insert(random(0, test.size()), s);
+  randomString(&s, maxString);
+  test.insert(random(0, test.size()),
+              s, random(0, s.size()),
+              random(0, maxString));
+  randomString(&s, maxString);
+  test.insert(random(0, test.size()),
+              s.c_str(), random(0, s.size()));
+  randomString(&s, maxString);
+  test.insert(random(0, test.size()), s.c_str());
+  test.insert(random(0, test.size()),
+              random(0, maxString), random('a', 'z'));
+  test.insert(test.begin() + random(0, test.size()),
+              random('a', 'z'));
+  std::list<char> lst(RandomList(maxString));
+  test.insert(test.begin() + random(0, test.size()),
+              lst.begin(), lst.end());
+}
+
+template <class String> void clause_21_3_5_n(String & test) {
+  // erase
+  if (!test.empty()) {
+    test.erase(random(0, test.size()), random(0, maxString));
+  }
+  if (!test.empty()) {
+    // TODO: is erase(end()) allowed?
+    test.erase(test.begin() + random(0, test.size() - 1));
+  }
+  if (!test.empty()) {
+    auto const i = test.begin() + random(0, test.size());
+    if (i != test.end()) {
+      test.erase(i, i + random(0, size_t(test.end() - i)));
+    }
+  }
+}
+
+template <class String> void clause_21_3_5_o(String & test) {
+  auto pos = random(0, test.size());
+  if (avoidAliasing) {
+    test.replace(pos, random(0, test.size() - pos),
+                 String(test));
+  } else {
+    test.replace(pos, random(0, test.size() - pos), test);
+  }
+  pos = random(0, test.size());
+  String s;
+  randomString(&s, maxString);
+  test.replace(pos, pos + random(0, test.size() - pos), s);
+  auto pos1 = random(0, test.size());
+  auto pos2 = random(0, test.size());
+  if (avoidAliasing) {
+    test.replace(pos1, pos1 + random(0, test.size() - pos1),
+                 String(test),
+                 pos2, pos2 + random(0, test.size() - pos2));
+  } else {
+    test.replace(pos1, pos1 + random(0, test.size() - pos1),
+                 test, pos2, pos2 + random(0, test.size() - pos2));
+  }
+  pos1 = random(0, test.size());
+  String str;
+  randomString(&str, maxString);
+  pos2 = random(0, str.size());
+  test.replace(pos1, pos1 + random(0, test.size() - pos1),
+               str, pos2, pos2 + random(0, str.size() - pos2));
+  pos = random(0, test.size());
+  if (avoidAliasing) {
+    test.replace(pos, random(0, test.size() - pos),
+                 String(test).c_str(), test.size());
+  } else {
+    test.replace(pos, random(0, test.size() - pos),
+                 test.c_str(), test.size());
+  }
+  pos = random(0, test.size());
+  randomString(&str, maxString);
+  test.replace(pos, pos + random(0, test.size() - pos),
+               str.c_str(), str.size());
+  pos = random(0, test.size());
+  randomString(&str, maxString);
+  test.replace(pos, pos + random(0, test.size() - pos),
+               str.c_str());
+  pos = random(0, test.size());
+  test.replace(pos, random(0, test.size() - pos),
+               random(0, maxString), random('a', 'z'));
+  pos = random(0, test.size());
+  if (avoidAliasing) {
+    test.replace(
+      test.begin() + pos,
+      test.begin() + pos + random(0, test.size() - pos),
+      String(test));
+  } else {
+    test.replace(
+      test.begin() + pos,
+      test.begin() + pos + random(0, test.size() - pos),
+      test);
+  }
+  pos = random(0, test.size());
+  if (avoidAliasing) {
+    test.replace(
+      test.begin() + pos,
+      test.begin() + pos + random(0, test.size() - pos),
+      String(test).c_str(),
+      test.size() - random(0, test.size()));
+  } else {
+    test.replace(
+      test.begin() + pos,
+      test.begin() + pos + random(0, test.size() - pos),
+      test.c_str(),
+      test.size() - random(0, test.size()));
+  }
+  pos = random(0, test.size());
+  auto const n = random(0, test.size() - pos);
+  typename String::iterator b = test.begin();
+  String str1;
+  randomString(&str1, maxString);
+  const String & str3 = str1;
+  const typename String::value_type* ss = str3.c_str();
+  test.replace(
+    b + pos,
+    b + pos + n,
+    ss);
+  pos = random(0, test.size());
+  test.replace(
+    test.begin() + pos,
+    test.begin() + pos + random(0, test.size() - pos),
+    random(0, maxString), random('a', 'z'));
+}
+
+template <class String> void clause_21_3_5_p(String & test) {
+  std::vector<typename String::value_type>
+    vec(random(0, maxString));
+  test.copy(
+    &vec[0],
+    vec.size(),
+    random(0, test.size()));
+}
+
+template <class String> void clause_21_3_5_q(String & test) {
+  String s;
+  randomString(&s, maxString);
+  s.swap(test);
+}
+
+template <class String> void clause_21_3_6_a(String & test) {
+  // 21.3.6 string operations
+  // exercise c_str() and data()
+  assert(test.c_str() == test.data());
+  // exercise get_allocator()
+  String s;
+  randomString(&s, maxString);
+  assert(test.get_allocator() == s.get_allocator());
+}
+
+template <class String> void clause_21_3_6_b(String & test) {
+  String str = test.substr(
+    random(0, test.size()),
+    random(0, test.size()));
+  Num2String(test, test.find(str, random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_c(String & test) {
+  auto from = random(0, test.size());
+  auto length = random(0, test.size() - from);
+  String str = test.substr(from, length);
+  Num2String(test, test.find(str.c_str(),
+                             random(0, test.size()),
+                             random(0, str.size())));
+}
+
+template <class String> void clause_21_3_6_d(String & test) {
+  String str = test.substr(
+    random(0, test.size()),
+    random(0, test.size()));
+  Num2String(test, test.find(str.c_str(),
+                             random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_e(String & test) {
+  Num2String(test, test.find(
+               random('a', 'z'),
+               random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_f(String & test) {
+  String str = test.substr(
+    random(0, test.size()),
+    random(0, test.size()));
+  Num2String(test, test.rfind(str, random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_g(String & test) {
+  String str = test.substr(
+    random(0, test.size()),
+    random(0, test.size()));
+  Num2String(test, test.rfind(str.c_str(),
+                              random(0, test.size()),
+                              random(0, str.size())));
+}
+
+template <class String> void clause_21_3_6_h(String & test) {
+  String str = test.substr(
+    random(0, test.size()),
+    random(0, test.size()));
+  Num2String(test, test.rfind(str.c_str(),
+                              random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_i(String & test) {
+  Num2String(test, test.rfind(
+               random('a', 'z'),
+               random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_j(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_first_of(str,
+                                      random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_k(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_first_of(str.c_str(),
+                                      random(0, test.size()),
+                                      random(0, str.size())));
+}
+
+template <class String> void clause_21_3_6_l(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_first_of(str.c_str(),
+                                      random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_m(String & test) {
+  Num2String(test, test.find_first_of(
+               random('a', 'z'),
+               random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_n(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_last_of(str,
+                                     random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_o(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_last_of(str.c_str(),
+                                     random(0, test.size()),
+                                     random(0, str.size())));
+}
+
+template <class String> void clause_21_3_6_p(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_last_of(str.c_str(),
+                                     random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_q(String & test) {
+  Num2String(test, test.find_last_of(
+               random('a', 'z'),
+               random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_r(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_first_not_of(str,
+                                          random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_s(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_first_not_of(str.c_str(),
+                                          random(0, test.size()),
+                                          random(0, str.size())));
+}
+
+template <class String> void clause_21_3_6_t(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_first_not_of(str.c_str(),
+                                          random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_u(String & test) {
+  Num2String(test, test.find_first_not_of(
+               random('a', 'z'),
+               random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_v(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_last_not_of(str,
+                                         random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_w(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_last_not_of(str.c_str(),
+                                         random(0, test.size()),
+                                         random(0, str.size())));
+}
+
+template <class String> void clause_21_3_6_x(String & test) {
+  String str;
+  randomString(&str, maxString);
+  Num2String(test, test.find_last_not_of(str.c_str(),
+                                         random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_y(String & test) {
+  Num2String(test, test.find_last_not_of(
+               random('a', 'z'),
+               random(0, test.size())));
+}
+
+template <class String> void clause_21_3_6_z(String & test) {
+  test = test.substr(random(0, test.size()), random(0, test.size()));
+}
+
+template <class String> void clause_21_3_7_a(String & test) {
+  String s;
+  randomString(&s, maxString);
+  int tristate = test.compare(s);
+  if (tristate > 0) tristate = 1;
+  else if (tristate < 0) tristate = 2;
+  Num2String(test, tristate);
+}
+
+template <class String> void clause_21_3_7_b(String & test) {
+  String s;
+  randomString(&s, maxString);
+  int tristate = test.compare(
+    random(0, test.size()),
+    random(0, test.size()),
+    s);
+  if (tristate > 0) tristate = 1;
+  else if (tristate < 0) tristate = 2;
+  Num2String(test, tristate);
+}
+
+template <class String> void clause_21_3_7_c(String & test) {
+  String str;
+  randomString(&str, maxString);
+  int tristate = test.compare(
+    random(0, test.size()),
+    random(0, test.size()),
+    str,
+    random(0, str.size()),
+    random(0, str.size()));
+  if (tristate > 0) tristate = 1;
+  else if (tristate < 0) tristate = 2;
+  Num2String(test, tristate);
+}
+
+template <class String> void clause_21_3_7_d(String & test) {
+  String s;
+  randomString(&s, maxString);
+  int tristate = test.compare(s.c_str());
+  if (tristate > 0) tristate = 1;
+  else if (tristate < 0) tristate = 2;
+                Num2String(test, tristate);
+}
+
+template <class String> void clause_21_3_7_e(String & test) {
+  String str;
+  randomString(&str, maxString);
+  int tristate = test.compare(
+    random(0, test.size()),
+    random(0, test.size()),
+    str.c_str(),
+    random(0, str.size()));
+  if (tristate > 0) tristate = 1;
+  else if (tristate < 0) tristate = 2;
+  Num2String(test, tristate);
+}
+
+template <class String> void clause_21_3_7_f(String & test) {
+  String s1;
+  randomString(&s1, maxString);
+  String s2;
+  randomString(&s2, maxString);
+  test = s1 + s2;
+}
+
+template <class String> void clause_21_3_7_g(String & test) {
+  String s;
+  randomString(&s, maxString);
+  String s1;
+  randomString(&s1, maxString);
+  test = s.c_str() + s1;
+}
+
+template <class String> void clause_21_3_7_h(String & test) {
+  String s;
+  randomString(&s, maxString);
+  test = typename String::value_type(random('a', 'z')) + s;
+}
+
+template <class String> void clause_21_3_7_i(String & test) {
+  String s;
+  randomString(&s, maxString);
+  String s1;
+  randomString(&s1, maxString);
+  test = s + s1.c_str();
+}
+
+template <class String> void clause_21_3_7_j(String & test) {
+  String s;
+  randomString(&s, maxString);
+  String s1;
+  randomString(&s1, maxString);
+  test = s + s1.c_str();
+}
+
+template <class String> void clause_21_3_7_k(String & test) {
+  String s;
+  randomString(&s, maxString);
+  test = s + typename String::value_type(random('a', 'z'));
+}
+
+// Numbering here is from C++11
+template <class String> void clause_21_4_8_9_a(String & test) {
+  stringstream s("asd asdfjhuhdf    asdfasdf\tasdsdf");
+  String str;
+  while (s) {
+    s >> str;
+    test += str + test;
+  }
+}
+
+TEST(FBString, testAllClauses) {
+  EXPECT_TRUE(1) << "Starting with seed: " << seed;
+  std::string r;
+  folly::fbstring c;
+#define TEST_CLAUSE(x)                                              \
+  do {                                                              \
+      if (1) {} else EXPECT_TRUE(1) << "Testing clause " << #x;     \
+      randomString(&r);                                             \
+      c = r;                                                        \
+      EXPECT_EQ(c, r);                                              \
+      auto localSeed = seed + count;                                \
+      rng = RandomT(localSeed);                                     \
+      clause_##x(r);                                                \
+      rng = RandomT(localSeed);                                     \
+      clause_##x(c);                                                \
+      EXPECT_EQ(r, c)                                               \
+        << "Lengths: " << r.size() << " vs. " << c.size()           \
+        << "\nReference: '" << r << "'"                             \
+        << "\nActual:    '" << c.data()[0] << "'";                  \
+    } while (++count % 100 != 0)
+
+  int count = 0;
+  TEST_CLAUSE(21_3_1_a);
+  TEST_CLAUSE(21_3_1_b);
+  TEST_CLAUSE(21_3_1_c);
+  TEST_CLAUSE(21_3_1_d);
+  TEST_CLAUSE(21_3_1_e);
+  TEST_CLAUSE(21_3_1_f);
+  TEST_CLAUSE(21_3_1_g);
+
+  TEST_CLAUSE(21_3_2);
+  TEST_CLAUSE(21_3_3);
+  TEST_CLAUSE(21_3_4);
+  TEST_CLAUSE(21_3_5_a);
+  TEST_CLAUSE(21_3_5_b);
+  TEST_CLAUSE(21_3_5_c);
+  TEST_CLAUSE(21_3_5_d);
+  TEST_CLAUSE(21_3_5_e);
+  TEST_CLAUSE(21_3_5_f);
+  TEST_CLAUSE(21_3_5_g);
+  TEST_CLAUSE(21_3_5_h);
+  TEST_CLAUSE(21_3_5_i);
+  TEST_CLAUSE(21_3_5_j);
+  TEST_CLAUSE(21_3_5_k);
+  TEST_CLAUSE(21_3_5_l);
+  TEST_CLAUSE(21_3_5_m);
+  TEST_CLAUSE(21_3_5_n);
+  TEST_CLAUSE(21_3_5_o);
+  TEST_CLAUSE(21_3_5_p);
+
+  TEST_CLAUSE(21_3_6_a);
+  TEST_CLAUSE(21_3_6_b);
+  TEST_CLAUSE(21_3_6_c);
+  TEST_CLAUSE(21_3_6_d);
+  TEST_CLAUSE(21_3_6_e);
+  TEST_CLAUSE(21_3_6_f);
+  TEST_CLAUSE(21_3_6_g);
+  TEST_CLAUSE(21_3_6_h);
+  TEST_CLAUSE(21_3_6_i);
+  TEST_CLAUSE(21_3_6_j);
+  TEST_CLAUSE(21_3_6_k);
+  TEST_CLAUSE(21_3_6_l);
+  TEST_CLAUSE(21_3_6_m);
+  TEST_CLAUSE(21_3_6_n);
+  TEST_CLAUSE(21_3_6_o);
+  TEST_CLAUSE(21_3_6_p);
+  TEST_CLAUSE(21_3_6_q);
+  TEST_CLAUSE(21_3_6_r);
+  TEST_CLAUSE(21_3_6_s);
+  TEST_CLAUSE(21_3_6_t);
+  TEST_CLAUSE(21_3_6_u);
+  TEST_CLAUSE(21_3_6_v);
+  TEST_CLAUSE(21_3_6_w);
+  TEST_CLAUSE(21_3_6_x);
+  TEST_CLAUSE(21_3_6_y);
+  TEST_CLAUSE(21_3_6_z);
+
+  TEST_CLAUSE(21_3_7_a);
+  TEST_CLAUSE(21_3_7_b);
+  TEST_CLAUSE(21_3_7_c);
+  TEST_CLAUSE(21_3_7_d);
+  TEST_CLAUSE(21_3_7_e);
+  TEST_CLAUSE(21_3_7_f);
+  TEST_CLAUSE(21_3_7_g);
+  TEST_CLAUSE(21_3_7_h);
+  TEST_CLAUSE(21_3_7_i);
+  TEST_CLAUSE(21_3_7_j);
+  TEST_CLAUSE(21_3_7_k);
+
+  TEST_CLAUSE(21_4_8_9_a);
+}
+
+TEST(FBString, testGetline) {
+  fbstring s1 = "\
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras accumsan \n\
+elit ut urna consectetur in sagittis mi auctor. Nulla facilisi. In nec \n\
+dolor leo, vitae imperdiet neque. Donec ut erat mauris, a faucibus \n\
+elit. Integer consectetur gravida augue, sit amet mattis mauris auctor \n\
+sed. Morbi congue libero eu nunc sodales adipiscing. In lectus nunc, \n\
+vulputate a fringilla at, venenatis quis justo. Proin eu velit \n\
+nibh. Maecenas vitae tellus eros. Pellentesque habitant morbi \n\
+tristique senectus et netus et malesuada fames ac turpis \n\
+egestas. Vivamus faucibus feugiat consequat. Donec fermentum neque sit \n\
+amet ligula suscipit porta. Phasellus facilisis felis in purus luctus \n\
+quis posuere leo tempor. Nam nunc purus, luctus a pharetra ut, \n\
+placerat at dui. Donec imperdiet, diam quis convallis pulvinar, dui \n\
+est commodo lorem, ut tincidunt diam nibh et nibh. Maecenas nec velit \n\
+massa, ut accumsan magna. Donec imperdiet tempor nisi et \n\
+laoreet. Phasellus lectus quam, ultricies ut tincidunt in, dignissim \n\
+id eros. Mauris vulputate tortor nec neque pellentesque sagittis quis \n\
+sed nisl. In diam lacus, lobortis ut posuere nec, ornare id quam.";
+  const char* f = "/tmp/fbstring_testing";
+  {
+    std::ofstream out(f);
+    if (!(out << s1)) {
+      EXPECT_TRUE(0) << "Couldn't write to temp file.";
+      return;
+    }
+  }
+  vector<fbstring> v;
+  boost::split(v, s1, boost::is_any_of("\n"));
+  ifstream input(f);
+  fbstring line;
+  FOR_EACH (i, v) {
+    EXPECT_TRUE(getline(input, line));
+    EXPECT_EQ(line, *i);
+  }
+}
+
+TEST(FBString, testMoveCtor) {
+  // Move constructor. Make sure we allocate a large string, so the
+  // small string optimization doesn't kick in.
+  auto size = random(100, 2000);
+  fbstring s(size, 'a');
+  fbstring test = std::move(s);
+  EXPECT_TRUE(s.empty());
+  EXPECT_EQ(size, test.size());
+}
+
+TEST(FBString, testMoveAssign) {
+  // Move constructor. Make sure we allocate a large string, so the
+  // small string optimization doesn't kick in.
+  auto size = random(100, 2000);
+  fbstring s(size, 'a');
+  fbstring test;
+  test = std::move(s);
+  EXPECT_TRUE(s.empty());
+  EXPECT_EQ(size, test.size());
+}
+
+TEST(FBString, testMoveOperatorPlusLhs) {
+  // Make sure we allocate a large string, so the
+  // small string optimization doesn't kick in.
+  auto size1 = random(100, 2000);
+  auto size2 = random(100, 2000);
+  fbstring s1(size1, 'a');
+  fbstring s2(size2, 'b');
+  fbstring test;
+  test = std::move(s1) + s2;
+  EXPECT_TRUE(s1.empty());
+  EXPECT_EQ(size1 + size2, test.size());
+}
+
+TEST(FBString, testMoveOperatorPlusRhs) {
+  // Make sure we allocate a large string, so the
+  // small string optimization doesn't kick in.
+  auto size1 = random(100, 2000);
+  auto size2 = random(100, 2000);
+  fbstring s1(size1, 'a');
+  fbstring s2(size2, 'b');
+  fbstring test;
+  test = s1 + std::move(s2);
+  EXPECT_EQ(size1 + size2, test.size());
+}
+
+TEST(FBString, testConstructionFromLiteralZero) {
+  try {
+    std::string s(0);
+    EXPECT_TRUE(false);
+  } catch (const std::logic_error&) {
+  } catch (...) {
+    EXPECT_TRUE(false);
+  }
+
+  try {
+    fbstring s(0);
+    EXPECT_TRUE(false);
+  } catch (const std::logic_error& e) {
+  } catch (...) {
+    EXPECT_TRUE(false);
+  }
+}
+
+TEST(FBString, testFixedBugs) {
+  { // D479397
+    fbstring str(1337, 'f');
+    fbstring cp = str;
+    cp.clear();
+    cp.c_str();
+    EXPECT_EQ(str.front(), 'f');
+  }
+  { // D481173, --extra-cxxflags=-DFBSTRING_CONSERVATIVE
+    fbstring str(1337, 'f');
+    for (int i = 0; i < 2; ++i) {
+      fbstring cp = str;
+      cp[1] = 'b';
+      EXPECT_EQ(cp.c_str()[cp.size()], '\0');
+      cp.push_back('?');
+    }
+  }
+}
+
+#define CONCAT(A, B) CONCAT_HELPER(A, B)
+#define CONCAT_HELPER(A, B) A##B
+#define BENCHFUN(F) CONCAT(CONCAT(BM_, F), CONCAT(_, STRING))
+
+#define STRING string
+#include "folly/test/FBStringTestBenchmarks.cpp.h"
+#undef STRING
+#define STRING fbstring
+#include "folly/test/FBStringTestBenchmarks.cpp.h"
+#undef STRING
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+
+  return ret;
+}
+
+/*
+malloc
+
+BENCHFUN(defaultCtor)                  100000  1.426 s   14.26 us  68.47 k
+BM_copyCtor_string/32k                 100000  63.48 ms  634.8 ns  1.502 M
+BM_ctorFromArray_string/32k            100000  303.3 ms  3.033 us  321.9 k
+BM_ctorFromChar_string/1M              100000  9.915 ms  99.15 ns  9.619 M
+BM_assignmentOp_string/256             100000  69.09 ms  690.9 ns   1.38 M
+BENCHFUN(assignmentFill)               100000  1.775 ms  17.75 ns  53.73 M
+BM_resize_string/512k                  100000  1.667 s   16.67 us  58.58 k
+BM_findSuccessful_string/512k          100000  287.3 ms  2.873 us  339.9 k
+BM_findUnsuccessful_string/512k        100000  320.3 ms  3.203 us  304.9 k
+BM_replace_string/256                  100000  69.68 ms  696.8 ns  1.369 M
+BM_push_back_string/1k                 100000  433.1 ms  4.331 us  225.5 k
+
+BENCHFUN(defaultCtor)                  100000  1.086 s   10.86 us  89.91 k
+BM_copyCtor_fbstring/32k               100000  4.218 ms  42.18 ns  22.61 M
+BM_ctorFromArray_fbstring/32k          100000  145.2 ms  1.452 us  672.7 k
+BM_ctorFromChar_fbstring/1M            100000   9.21 ms   92.1 ns  10.35 M
+BM_assignmentOp_fbstring/256           100000  61.95 ms  619.5 ns   1.54 M
+BENCHFUN(assignmentFill)               100000   1.41 ms   14.1 ns  67.64 M
+BM_resize_fbstring/512k                100000  1.668 s   16.68 us  58.56 k
+BM_findSuccessful_fbstring/512k        100000   20.6 ms    206 ns  4.629 M
+BM_findUnsuccessful_fbstring/512k      100000  141.3 ms  1.413 us  691.1 k
+BM_replace_fbstring/256                100000  77.12 ms  771.2 ns  1.237 M
+BM_push_back_fbstring/1k               100000  1.745 s   17.45 us  55.95 k
+
+jemalloc
+
+BENCHFUN(defaultCtor)                  100000  1.426 s   14.26 us   68.5 k
+BM_copyCtor_string/32k                 100000  275.7 ms  2.757 us  354.2 k
+BM_ctorFromArray_string/32k            100000    270 ms    2.7 us  361.7 k
+BM_ctorFromChar_string/1M              100000  10.36 ms  103.6 ns  9.206 M
+BM_assignmentOp_string/256             100000  70.44 ms  704.3 ns  1.354 M
+BENCHFUN(assignmentFill)               100000  1.766 ms  17.66 ns     54 M
+BM_resize_string/512k                  100000  1.675 s   16.75 us  58.29 k
+BM_findSuccessful_string/512k          100000  90.89 ms  908.9 ns  1.049 M
+BM_findUnsuccessful_string/512k        100000  315.1 ms  3.151 us  309.9 k
+BM_replace_string/256                  100000  71.14 ms  711.4 ns  1.341 M
+BM_push_back_string/1k                 100000  425.1 ms  4.251 us  229.7 k
+
+BENCHFUN(defaultCtor)                  100000  1.082 s   10.82 us  90.23 k
+BM_copyCtor_fbstring/32k               100000  4.213 ms  42.13 ns  22.64 M
+BM_ctorFromArray_fbstring/32k          100000  113.2 ms  1.132 us    863 k
+BM_ctorFromChar_fbstring/1M            100000  9.162 ms  91.62 ns  10.41 M
+BM_assignmentOp_fbstring/256           100000  61.34 ms  613.4 ns  1.555 M
+BENCHFUN(assignmentFill)               100000  1.408 ms  14.08 ns  67.73 M
+BM_resize_fbstring/512k                100000  1.671 s   16.71 us  58.43 k
+BM_findSuccessful_fbstring/512k        100000  8.723 ms  87.23 ns  10.93 M
+BM_findUnsuccessful_fbstring/512k      100000  141.3 ms  1.413 us  691.2 k
+BM_replace_fbstring/256                100000  77.83 ms  778.3 ns  1.225 M
+BM_push_back_fbstring/1k               100000  1.744 s   17.44 us  55.99 k
+*/
diff --git a/folly/test/FBStringTestBenchmarks.cpp.h b/folly/test/FBStringTestBenchmarks.cpp.h
new file mode 100644
index 00000000..15ddc650
--- /dev/null
+++ b/folly/test/FBStringTestBenchmarks.cpp.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This file is supposed to be included from within
+ * FBStringTest. Do not use otherwise.
+ */
+
+void BENCHFUN(initRNG)(int iters, int) {
+  srand(seed);
+}
+BENCHMARK_PARAM(BENCHFUN(initRNG), 0);
+
+void BENCHFUN(defaultCtor)(int iters, int) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    STRING s[4096];
+    doNotOptimizeAway(&s);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(defaultCtor), 0);
+
+void BENCHFUN(copyCtor)(int iters, int arg) {
+  STRING s;
+  BENCHMARK_SUSPEND {
+    randomString(&s, arg);
+  }
+  FOR_EACH_RANGE (i, 0, iters) {
+    STRING s1 = s;
+    doNotOptimizeAway(&s1);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(copyCtor), 32768);
+
+void BENCHFUN(ctorFromArray)(int iters, int arg) {
+  STRING s;
+  BENCHMARK_SUSPEND {
+    randomString(&s, arg);
+    if (s.empty()) {
+      s = "This is rare.";
+    }
+  }
+  FOR_EACH_RANGE (i, 0, iters) {
+    STRING s1(s.data(), s.size());
+    doNotOptimizeAway(&s1);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(ctorFromArray), 32768);
+
+void BENCHFUN(ctorFromTwoPointers)(int iters, int arg) {
+  static STRING s;
+  BENCHMARK_SUSPEND {
+    if (s.size() < arg) s.resize(arg);
+  }
+  FOR_EACH_RANGE (i, 0, iters) {
+    STRING s1(s.begin(), s.end());
+    doNotOptimizeAway(&s1);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(ctorFromTwoPointers), 0);
+BENCHMARK_PARAM(BENCHFUN(ctorFromTwoPointers), 7);
+BENCHMARK_PARAM(BENCHFUN(ctorFromTwoPointers), 15);
+BENCHMARK_PARAM(BENCHFUN(ctorFromTwoPointers), 23);
+BENCHMARK_PARAM(BENCHFUN(ctorFromTwoPointers), 24);
+
+void BENCHFUN(ctorFromChar)(int iters, int arg) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    STRING s1('a', arg);
+    doNotOptimizeAway(&s1);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(ctorFromChar), 1048576);
+
+void BENCHFUN(assignmentOp)(int iters, int arg) {
+  STRING s;
+  BENCHMARK_SUSPEND {
+    randomString(&s, arg);
+  }
+  FOR_EACH_RANGE (i, 0, iters) {
+    STRING s1;
+    BENCHMARK_SUSPEND {
+      randomString(&s1, arg);
+      doNotOptimizeAway(&s1);
+    }
+    s1 = s;
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(assignmentOp), 256);
+
+void BENCHFUN(assignmentFill)(int iters, int) {
+  STRING s;
+  FOR_EACH_RANGE (i, 0, iters) {
+    s = static_cast<char>(i);
+    doNotOptimizeAway(&s);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(assignmentFill), 0);
+
+void BENCHFUN(resize)(int iters, int arg) {
+  STRING s;
+  FOR_EACH_RANGE (i, 0, iters) {
+    s.resize(random(0, arg));
+    doNotOptimizeAway(&s);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(resize), 524288);
+
+void BENCHFUN(findSuccessful)(int iters, int arg) {
+  size_t pos, len;
+  STRING s;
+
+  BENCHMARK_SUSPEND {
+
+    // Text courtesy (ahem) of
+    // http://www.psychologytoday.com/blog/career-transitions/200906/
+    // the-dreaded-writing-sample
+    s = "\
+Even if you've mastered the art of the cover letter and the resume, \
+another part of the job search process can trip up an otherwise \
+qualified candidate: the writing sample.\n\
+\n\
+Strong writing and communication skills are highly sought after by \
+most employers. Whether crafting short emails or lengthy annual \
+reports, many workers use their writing skills every day. And for an \
+employer seeking proof behind that ubiquitous candidate \
+phrase,\"excellent communication skills\", a required writing sample \
+is invaluable.\n\
+\n\
+Writing samples need the same care and attention given to cover \
+letters and resumes. Candidates with otherwise impeccable credentials \
+are routinely eliminated by a poorly chosen writing sample. Notice I \
+said \"poorly chosen\" not \"poorly written.\" Because that's the rub: \
+a writing sample not only reveals the individual's writing skills, it \
+also offers a peek into what they consider important or relevant for \
+the position. If you miss that mark with your writing sample, don't \
+expect to get a call for an interview.";
+
+    pos = random(0, s.size());
+    len = random(0, s.size() - pos);
+  }
+  FOR_EACH_RANGE (i, 0, iters) {
+    doNotOptimizeAway(s.find(s.data(), pos, len));
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(findSuccessful), 524288);
+
+void BENCHFUN(findUnsuccessful)(int iters, int arg) {
+  STRING s, s1;
+
+  BENCHMARK_SUSPEND {
+    s = "\
+Even if you've mastered the art of the cover letter and the resume, \
+another part of the job search process can trip up an otherwise \
+qualified candidate: the writing sample.\n\
+\n\
+Strong writing and communication skills are highly sought after by \
+most employers. Whether crafting short emails or lengthy annual \
+reports, many workers use their writing skills every day. And for an \
+employer seeking proof behind that ubiquitous candidate \
+phrase,\"excellent communication skills\", a required writing sample \
+is invaluable.\n\
+\n\
+Writing samples need the same care and attention given to cover \
+letters and resumes. Candidates with otherwise impeccable credentials \
+are routinely eliminated by a poorly chosen writing sample. Notice I \
+said \"poorly chosen\" not \"poorly written.\" Because that's the rub: \
+a writing sample not only reveals the individual's writing skills, it \
+also offers a peek into what they consider important or relevant for \
+the position. If you miss that mark with your writing sample, don't \
+expect to get a call for an interview.";
+
+    s1 = "So how do you tackle that writing sample request?";
+  }
+
+  FOR_EACH_RANGE (i, 0, iters) {
+    doNotOptimizeAway(s.find(s1));
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(findUnsuccessful), 524288);
+
+void BENCHFUN(replace)(int iters, int arg) {
+  STRING s;
+  BENCHMARK_SUSPEND {
+    randomString(&s, arg);
+  }
+  FOR_EACH_RANGE (i, 0, iters) {
+    BenchmarkSuspender susp;
+    doNotOptimizeAway(&s);
+    auto const pos = random(0, s.size());
+    auto toRemove = random(0, s.size() - pos);
+    auto toInsert = random(0, arg);
+    STRING s1;
+    randomString(&s1, toInsert);
+    susp.dismiss();
+
+   s.replace(pos, toRemove, s1);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(replace), 256);
+
+void BENCHFUN(push_back)(int iters, int arg) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    STRING s;
+    FOR_EACH_RANGE (j, 0, arg) {
+      s += ' ';
+    }
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(push_back), 1);
+BENCHMARK_PARAM(BENCHFUN(push_back), 23);
+BENCHMARK_PARAM(BENCHFUN(push_back), 127);
+BENCHMARK_PARAM(BENCHFUN(push_back), 1024);
diff --git a/folly/test/FBVectorTest.cpp b/folly/test/FBVectorTest.cpp
new file mode 100644
index 00000000..aee0831c
--- /dev/null
+++ b/folly/test/FBVectorTest.cpp
@@ -0,0 +1,259 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Author: andrei.alexandrescu@fb.com
+
+#include "folly/Traits.h"
+#include "folly/Random.h"
+#include "folly/FBString.h"
+#include "folly/FBVector.h"
+#include "folly/Benchmark.h"
+
+#include <gflags/gflags.h>
+
+#include <gtest/gtest.h>
+#include <list>
+#include <memory>
+#include <boost/random.hpp>
+
+using namespace std;
+using namespace folly;
+
+auto static const seed = randomNumberSeed();
+typedef boost::mt19937 RandomT;
+static RandomT rng(seed);
+static const size_t maxString = 100;
+static const bool avoidAliasing = true;
+
+template <class Integral1, class Integral2>
+Integral2 random(Integral1 low, Integral2 up) {
+  boost::uniform_int<> range(low, up);
+  return range(rng);
+}
+
+template <class String>
+void randomString(String* toFill, unsigned int maxSize = 1000) {
+  assert(toFill);
+  toFill->resize(random(0, maxSize));
+  FOR_EACH (i, *toFill) {
+    *i = random('a', 'z');
+  }
+}
+
+template <class String, class Integral>
+void Num2String(String& str, Integral n) {
+  str.resize(10, '\0');
+//    ultoa((unsigned long)n, &str[0], 10);
+  sprintf(&str[0], "%ul", 10);
+  str.resize(strlen(str.c_str()));
+}
+
+std::list<char> RandomList(unsigned int maxSize) {
+  std::list<char> lst(random(0u, maxSize));
+  std::list<char>::iterator i = lst.begin();
+  for (; i != lst.end(); ++i) {
+    *i = random('a', 'z');
+  }
+  return lst;
+}
+
+template<class T> T randomObject();
+
+template<> int randomObject<int>() {
+  return random(0, 1024);
+}
+
+template<> folly::fbstring randomObject<folly::fbstring>() {
+  folly::fbstring result;
+  randomString(&result);
+  return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests begin here
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(fbvector, clause_23_3_6_1_3_ambiguity) {
+  fbvector<int> v(10, 20);
+  EXPECT_EQ(v.size(), 10);
+  FOR_EACH (i, v) {
+    EXPECT_EQ(*i, 20);
+  }
+}
+
+TEST(fbvector, clause_23_3_6_1_11_ambiguity) {
+  fbvector<int> v;
+  v.assign(10, 20);
+  EXPECT_EQ(v.size(), 10);
+  FOR_EACH (i, v) {
+    EXPECT_EQ(*i, 20);
+  }
+}
+
+TEST(fbvector, clause_23_3_6_2_6) {
+  fbvector<int> v;
+  auto const n = random(0U, 10000U);
+  v.reserve(n);
+  auto const n1 = random(0U, 10000U);
+  auto const obj = randomObject<int>();
+  v.assign(n1, obj);
+  v.shrink_to_fit();
+  // Nothing to verify except that the call made it through
+}
+
+TEST(fbvector, clause_23_3_6_4_ambiguity) {
+  fbvector<int> v;
+  fbvector<int>::const_iterator i = v.end();
+  v.insert(i, 10, 20);
+  EXPECT_EQ(v.size(), 10);
+  FOR_EACH (i, v) {
+    EXPECT_EQ(*i, 20);
+  }
+}
+
+TEST(fbvector, composition) {
+  fbvector< fbvector<double> > matrix(100, fbvector<double>(100));
+}
+
+TEST(fbvector, works_with_std_string) {
+  fbvector<std::string> v(10, "hello");
+  EXPECT_EQ(v.size(), 10);
+  v.push_back("world");
+}
+
+namespace {
+class UserDefinedType { int whatevs_; };
+}
+
+FOLLY_ASSUME_FBVECTOR_COMPATIBLE(UserDefinedType);
+
+TEST(fbvector, works_with_user_defined_type) {
+  fbvector<UserDefinedType> v(10);
+  EXPECT_EQ(v.size(), 10);
+  v.push_back(UserDefinedType());
+}
+
+TEST(fbvector, move_construction) {
+  fbvector<int> v1(100, 100);
+  fbvector<int> v2;
+  EXPECT_EQ(v1.size(), 100);
+  EXPECT_EQ(v1.front(), 100);
+  EXPECT_EQ(v2.size(), 0);
+  v2 = std::move(v1);
+  EXPECT_EQ(v1.size(), 0);
+  EXPECT_EQ(v2.size(), 100);
+  EXPECT_EQ(v2.front(), 100);
+
+  v1.assign(100, 100);
+  auto other = std::move(v1);
+  EXPECT_EQ(v1.size(), 0);
+  EXPECT_EQ(other.size(), 100);
+  EXPECT_EQ(other.front(), 100);
+}
+
+TEST(fbvector, emplace) {
+  fbvector<std::string> s(12, "asd");
+  EXPECT_EQ(s.size(), 12);
+  EXPECT_EQ(s.front(), "asd");
+  s.emplace_back("funk");
+  EXPECT_EQ(s.back(), "funk");
+}
+
+TEST(fbvector, initializer_lists) {
+  fbvector<int> vec = { 1, 2, 3 };
+  EXPECT_EQ(vec.size(), 3);
+  EXPECT_EQ(vec[0], 1);
+  EXPECT_EQ(vec[1], 2);
+  EXPECT_EQ(vec[2], 3);
+
+  vec = { 0, 0, 12, 16 };
+  EXPECT_EQ(vec.size(), 4);
+  EXPECT_EQ(vec[0], 0);
+  EXPECT_EQ(vec[1], 0);
+  EXPECT_EQ(vec[2], 12);
+  EXPECT_EQ(vec[3], 16);
+
+  vec.insert(vec.begin() + 1, { 23, 23 });
+  EXPECT_EQ(vec.size(), 6);
+  EXPECT_EQ(vec[0], 0);
+  EXPECT_EQ(vec[1], 23);
+  EXPECT_EQ(vec[2], 23);
+  EXPECT_EQ(vec[3], 0);
+  EXPECT_EQ(vec[4], 12);
+  EXPECT_EQ(vec[5], 16);
+}
+
+TEST(fbvector, unique_ptr) {
+  fbvector<std::unique_ptr<int> > v(12);
+  std::unique_ptr<int> p(new int(12));
+  v.push_back(std::move(p));
+  EXPECT_EQ(*v.back(), 12);
+
+  v[0] = std::move(p);
+  EXPECT_FALSE(v[0].get());
+  v[0].reset(new int(32));
+  std::unique_ptr<int> somePtr;
+  v.insert(v.begin(), std::move(somePtr));
+  EXPECT_EQ(*v[1], 32);
+}
+
+TEST(FBVector, task858056) {
+  fbvector<fbstring> cycle;
+  cycle.push_back("foo");
+  cycle.push_back("bar");
+  cycle.push_back("baz");
+  fbstring message("Cycle detected: ");
+  FOR_EACH_R (node_name, cycle) {
+    message += "[";
+    message += *node_name;
+    message += "] ";
+  }
+  EXPECT_EQ("Cycle detected: [baz] [bar] [foo] ", message);
+}
+
+#define CONCAT(A, B) CONCAT_HELPER(A, B)
+#define CONCAT_HELPER(A, B) A##B
+#define BENCHFUN(F) CONCAT(CONCAT(BM_, F), CONCAT(_, VECTOR))
+#define TESTFUN(F) TEST(fbvector, CONCAT(F, VECTOR))
+
+typedef vector<int> IntVector;
+typedef fbvector<int> IntFBVector;
+typedef vector<folly::fbstring> FBStringVector;
+typedef fbvector<folly::fbstring> FBStringFBVector;
+
+#define VECTOR IntVector
+#include "folly/test/FBVectorTestBenchmarks.cpp.h"
+#undef VECTOR
+#define VECTOR IntFBVector
+#include "folly/test/FBVectorTestBenchmarks.cpp.h"
+#undef VECTOR
+#define VECTOR FBStringVector
+#include "folly/test/FBVectorTestBenchmarks.cpp.h"
+#undef VECTOR
+#define VECTOR FBStringFBVector
+#include "folly/test/FBVectorTestBenchmarks.cpp.h"
+#undef VECTOR
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return ret;
+}
diff --git a/folly/test/FBVectorTestBenchmarks.cpp.h b/folly/test/FBVectorTestBenchmarks.cpp.h
new file mode 100644
index 00000000..e0ece20c
--- /dev/null
+++ b/folly/test/FBVectorTestBenchmarks.cpp.h
@@ -0,0 +1,379 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This file is supposed to be included from within
+ * FBVectorTest. Do not use otherwise.
+ */
+
+TESTFUN(clause_23_3_6_1_1) {
+  VECTOR v;
+  EXPECT_TRUE(v.empty());
+  VECTOR::allocator_type a;
+  VECTOR v1(a);
+  EXPECT_TRUE(v1.empty());
+}
+
+TESTFUN(clause_23_3_6_1_3) {
+  auto const n = random(0U, 10000U);
+  VECTOR v(n);
+  EXPECT_EQ(v.size(), n);
+  FOR_EACH (i, v) {
+    EXPECT_EQ(*i, VECTOR::value_type());
+  }
+}
+
+TESTFUN(clause_23_3_6_1_9) {
+  // Insert with iterators
+  list<VECTOR::value_type> lst;
+  auto const n = random(0U, 10000U);
+  FOR_EACH_RANGE (i, 0, n) {
+    lst.push_back(randomObject<VECTOR::value_type>());
+  }
+  VECTOR v(lst.begin(), lst.end());
+  EXPECT_EQ(v.size(), lst.size());
+  size_t j = 0;
+  FOR_EACH (i, lst) {
+    EXPECT_EQ(v[j++], *i);
+  }
+}
+
+TESTFUN(clause_23_3_6_1_11) {
+  // assign with iterators
+  list<VECTOR::value_type> lst;
+  auto const n = random(0U, 10000U);
+  FOR_EACH_RANGE (i, 0, n) {
+    lst.push_back(randomObject<VECTOR::value_type>());
+  }
+  VECTOR v;
+  v.assign(lst.begin(), lst.end());
+  EXPECT_EQ(v.size(), lst.size());
+  size_t j = 0;
+  FOR_EACH (i, lst) {
+    EXPECT_EQ(v[j++], *i);
+  }
+
+  // aliased assign
+  v.assign(v.begin(), v.begin() + v.size() / 2);
+  EXPECT_EQ(v.size(), lst.size() / 2);
+  j = 0;
+  FOR_EACH (i, lst) {
+    if (j == v.size()) break;
+    EXPECT_EQ(v[j++], *i);
+  }
+}
+
+TESTFUN(clause_23_3_6_1_12) {
+  VECTOR v;
+  auto const n = random(0U, 10000U);
+  auto const obj = randomObject<VECTOR::value_type>();
+  v.assign(n, obj);
+  EXPECT_EQ(v.size(), n);
+  FOR_EACH (i, v) {
+    EXPECT_EQ(*i, obj);
+  }
+}
+
+TESTFUN(clause_23_3_6_2_1) {
+  VECTOR v;
+  auto const n = random(0U, 10000U);
+  v.reserve(n);
+  EXPECT_GE(v.capacity(), n);
+}
+
+TESTFUN(clause_23_3_6_2_7) {
+  auto const n1 = random(0U, 10000U);
+  auto const n2 = random(0U, 10000U);
+  auto const obj1 = randomObject<VECTOR::value_type>();
+  auto const obj2 = randomObject<VECTOR::value_type>();
+  VECTOR v1(n1, obj1), v2(n2, obj2);
+  v1.swap(v2);
+  EXPECT_EQ(v1.size(), n2);
+  EXPECT_EQ(v2.size(), n1);
+  FOR_EACH (i, v1) {
+    EXPECT_EQ(*i, obj2);
+  }
+  FOR_EACH (i, v2) {
+    EXPECT_EQ(*i, obj1);
+  }
+}
+
+TESTFUN(clause_23_3_6_2_9) {
+  VECTOR v;
+  auto const n1 = random(0U, 10000U);
+  v.resize(n1);
+  FOR_EACH (i, v) {
+    EXPECT_EQ(*i, VECTOR::value_type());
+  }
+  auto const n2 = random(0U, 10000U);
+  FOR_EACH (i, v) {
+    EXPECT_EQ(*i, VECTOR::value_type());
+  }
+}
+
+TESTFUN(clause_23_3_6_2_11) {
+  VECTOR v;
+  auto const n1 = random(0U, 10000U);
+  auto const obj1 = randomObject<VECTOR::value_type>();
+  v.resize(n1, obj1);
+  FOR_EACH (i, v) {
+    EXPECT_EQ(*i, obj1);
+  }
+  auto const n2 = random(0U, 10000U);
+  auto const obj2 = randomObject<VECTOR::value_type>();
+  v.resize(n2, obj2);
+  if (n1 < n2) {
+    FOR_EACH_RANGE (i, n1, n2) {
+      EXPECT_EQ(v[i], obj2);
+    }
+  }
+}
+
+TESTFUN(clause_absent_element_access) {
+  VECTOR v;
+  auto const n1 = random(1U, 10000U);
+  auto const obj1 = randomObject<VECTOR::value_type>();
+  v.resize(n1, obj1);
+  auto const n = random(0U, v.size() - 1);
+  EXPECT_EQ(v[n], v.at(n));
+  auto const obj2 = randomObject<VECTOR::value_type>();
+  v[n] = obj2;
+  EXPECT_EQ(v[n], v.at(n));
+  EXPECT_EQ(v[n], obj2);
+  auto const obj3 = randomObject<VECTOR::value_type>();
+  v.at(n) = obj3;
+  EXPECT_EQ(v[n], v.at(n));
+  EXPECT_EQ(v[n], obj3);
+}
+
+TESTFUN(clause_23_3_6_3_1) {
+  VECTOR v;
+  auto const n1 = random(1U, 10000U);
+  auto const obj1 = randomObject<VECTOR::value_type>();
+  v.resize(n1, obj1);
+  EXPECT_EQ(v.data(), &v.front());
+}
+
+TESTFUN(clause_23_3_6_4_1_a) {
+  VECTOR v, w;
+  auto const n1 = random(1U, 10000U);
+  FOR_EACH_RANGE (i, 0, n1) {
+    auto const obj1 = randomObject<VECTOR::value_type>();
+    v.push_back(obj1);
+    w.push_back(obj1);
+  }
+  auto const n2 = random(0U, n1 - 1);
+  auto pos = v.begin() + n2;
+  auto const obj2 = randomObject<VECTOR::value_type>();
+
+  auto r = v.insert(pos, obj2);
+
+  EXPECT_EQ(v.size(), w.size() + 1);
+  EXPECT_EQ(r - v.begin(), n2);
+  EXPECT_EQ(*r, obj2);
+  FOR_EACH_RANGE (i, 0, r - v.begin()) {
+    EXPECT_EQ(v[i], w[i]);
+  }
+  FOR_EACH_RANGE (i, r - v.begin() + 1, v.size()) {
+    EXPECT_EQ(v[i], w[i - 1]);
+  }
+}
+
+TESTFUN(clause_23_3_6_4_1_c) {
+  // This test only works for fbvector
+  fbvector<VECTOR::value_type> v, w;
+  auto const n1 = random(1U, 10000U);
+  FOR_EACH_RANGE (i, 0, n1) {
+    auto const obj1 = randomObject<VECTOR::value_type>();
+    v.push_back(obj1);
+    w.push_back(obj1);
+  }
+  auto const n2 = random(0U, n1-1);
+  auto pos = v.begin() + n2;
+  auto const obj2 = randomObject<VECTOR::value_type>();
+  auto const n3 = random(0U, 10000U);
+
+  auto r = v.insert(pos, n3, obj2);
+
+  EXPECT_EQ(v.size(), w.size() + n3);
+  EXPECT_EQ(r - v.begin(), n2);
+  FOR_EACH_RANGE (i, 0, r - v.begin()) {
+    EXPECT_EQ(v[i], w[i]);
+  }
+  FOR_EACH_RANGE (i, r - v.begin(), r - v.begin() + n3) {
+    EXPECT_EQ(v[i], obj2);
+  }
+  FOR_EACH_RANGE (i, r - v.begin() + n3, v.size()) {
+    EXPECT_EQ(v[i], w[i - n3]);
+  }
+}
+
+TESTFUN(clause_23_3_6_4_1_d) {
+  VECTOR v, w;
+  auto const n1 = random(0U, 10000U);
+  FOR_EACH_RANGE (i, 0, n1) {
+    auto const obj1 = randomObject<VECTOR::value_type>();
+    v.push_back(obj1);
+    w.push_back(obj1);
+  }
+  EXPECT_EQ(v.size(), n1);
+
+  auto const obj2 = randomObject<VECTOR::value_type>();
+  v.push_back(obj2);
+  EXPECT_EQ(v.back(), obj2);
+  EXPECT_EQ(v.size(), w.size() + 1);
+
+  FOR_EACH_RANGE (i, 0, w.size()) {
+    EXPECT_EQ(v[i], w[i]);
+  }
+}
+
+TESTFUN(clause_23_3_6_4_3) {
+  VECTOR v, w;
+  auto const n1 = random(1U, 10000U);
+  FOR_EACH_RANGE (i, 0, n1) {
+    auto const obj1 = randomObject<VECTOR::value_type>();
+    v.push_back(obj1);
+    w.push_back(obj1);
+  }
+  EXPECT_EQ(v.size(), n1);
+
+  auto const n2 = random(0U, n1 - 1);
+  auto it = v.erase(v.begin() + n2);
+  EXPECT_EQ(v.size() + 1, w.size());
+
+  FOR_EACH_RANGE (i, 0, it - v.begin()) {
+    EXPECT_EQ(v[i], w[i]);
+  }
+
+  FOR_EACH_RANGE (i, it - v.begin(), v.size()) {
+    EXPECT_EQ(v[i], w[i + 1]);
+  }
+}
+
+TESTFUN(clause_23_3_6_4_4) {
+  VECTOR v, w;
+  auto const n1 = random(1U, 10000U);
+  FOR_EACH_RANGE (i, 0, n1) {
+    auto const obj1 = randomObject<VECTOR::value_type>();
+    v.push_back(obj1);
+    w.push_back(obj1);
+  }
+  EXPECT_EQ(v.size(), n1);
+
+  auto const n2 = random(0U, n1 - 1);
+  auto const n3 = random(n2, n1 - 1);
+  auto it = v.erase(v.begin() + n2, v.begin() + n3);
+  EXPECT_EQ(v.size() + (n3 - n2), w.size());
+
+  FOR_EACH_RANGE (i, 0, it - v.begin()) {
+    EXPECT_EQ(v[i], w[i]);
+  }
+
+  FOR_EACH_RANGE (i, it - v.begin(), v.size()) {
+    EXPECT_EQ(v[i], w[i + (n3 - n2)]);
+  }
+}
+
+TESTFUN(clause_23_3_6_4_clear) {
+  VECTOR v;
+  v.clear();
+  EXPECT_TRUE(v.empty());
+  v.resize(random(0U, 10000U));
+  auto c = v.capacity();
+  v.clear();
+  EXPECT_TRUE(v.empty());
+  EXPECT_EQ(v.capacity(), c);
+}
+
+BENCHMARK(BENCHFUN(zzInitRNG), iters) {
+  //LOG(INFO) << "\nTesting with type " << typeid(VECTOR).name() << "\n";
+  srand(seed);
+}
+
+BENCHMARK(BENCHFUN(defaultCtor), iters) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    VECTOR v[4096];
+    doNotOptimizeAway(&v);
+  }
+}
+
+void BENCHFUN(sizeCtor)(int iters, int size) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    VECTOR v(size);
+    doNotOptimizeAway(&v);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(sizeCtor), 128);
+BENCHMARK_PARAM(BENCHFUN(sizeCtor), 1024);
+BENCHMARK_PARAM(BENCHFUN(sizeCtor), 1048576);
+
+void BENCHFUN(fillCtor)(int iters, int size) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    VECTOR v(size_t(size), randomObject<VECTOR::value_type>());
+    doNotOptimizeAway(&v);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(fillCtor), 128);
+BENCHMARK_PARAM(BENCHFUN(fillCtor), 1024);
+BENCHMARK_PARAM(BENCHFUN(fillCtor), 10240);
+
+void BENCHFUN(pushBack)(int iters, int size) {
+  auto const obj = randomObject<VECTOR::value_type>();
+  FOR_EACH_RANGE (i, 0, iters) {
+    VECTOR v;
+    FOR_EACH_RANGE (j, 0, size) {
+      v.push_back(obj);
+    }
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(pushBack), 128);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 1024);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 10240);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 102400);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 512000);
+
+void BENCHFUN(reserve)(int iters, int size) {
+  auto const obj = randomObject<VECTOR::value_type>();
+  VECTOR v(random(0U, 10000U), obj);
+  FOR_EACH_RANGE (i, 0, iters) {
+    v.reserve(random(0U, 100000U));
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(reserve), 128);
+BENCHMARK_PARAM(BENCHFUN(reserve), 1024);
+BENCHMARK_PARAM(BENCHFUN(reserve), 10240);
+
+void BENCHFUN(insert)(int iters, int size) {
+  auto const obj1 = randomObject<VECTOR::value_type>();
+  auto const obj2 = randomObject<VECTOR::value_type>();
+  VECTOR v(random(0U, 1U), obj1);
+  FOR_EACH_RANGE (i, 0, iters / 100) {
+    v.insert(v.begin(), obj2);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(insert), 100);
+
+void BENCHFUN(erase)(int iters, int size) {
+  auto const obj1 = randomObject<VECTOR::value_type>();
+  VECTOR v(random(0U, 100U), obj1);
+  FOR_EACH_RANGE (i, 0, iters) {
+    if (v.empty()) continue;
+    v.erase(v.begin());
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(erase), 1024);
+
diff --git a/folly/test/ForeachTest.cpp b/folly/test/ForeachTest.cpp
new file mode 100644
index 00000000..700e87c0
--- /dev/null
+++ b/folly/test/ForeachTest.cpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Foreach.h"
+
+#include "folly/Benchmark.h"
+#include <gtest/gtest.h>
+#include <map>
+#include <string>
+#include <vector>
+#include <list>
+
+using namespace folly;
+using namespace folly::detail;
+
+TEST(Foreach, ForEachKV) {
+  std::map<std::string, int> testMap;
+  testMap["abc"] = 1;
+  testMap["def"] = 2;
+  std::string keys = "";
+  int values = 0;
+  int numEntries = 0;
+  FOR_EACH_KV (key, value, testMap) {
+    keys += key;
+    values += value;
+    ++numEntries;
+  }
+  EXPECT_EQ("abcdef", keys);
+  EXPECT_EQ(3, values);
+  EXPECT_EQ(2, numEntries);
+}
+
+TEST(Foreach, ForEachKVBreak) {
+  std::map<std::string, int> testMap;
+  testMap["abc"] = 1;
+  testMap["def"] = 2;
+  std::string keys = "";
+  int values = 0;
+  int numEntries = 0;
+  FOR_EACH_KV (key, value, testMap) {
+    keys += key;
+    values += value;
+    ++numEntries;
+    break;
+  }
+  EXPECT_EQ("abc", keys);
+  EXPECT_EQ(1, values);
+  EXPECT_EQ(1, numEntries);
+}
+
+TEST(Foreach, ForEachKvWithMultiMap) {
+  std::multimap<std::string, int> testMap;
+  testMap.insert(std::make_pair("abc", 1));
+  testMap.insert(std::make_pair("abc", 2));
+  testMap.insert(std::make_pair("def", 3));
+  std::string keys = "";
+  int values = 0;
+  int numEntries = 0;
+  FOR_EACH_KV (key, value, testMap) {
+    keys += key;
+    values += value;
+    ++numEntries;
+  }
+  EXPECT_EQ("abcabcdef", keys);
+  EXPECT_EQ(6, values);
+  EXPECT_EQ(3, numEntries);
+}
+
+TEST(Foreach, ForEachEnumerate) {
+  std::vector<int> vv;
+  int sumAA = 0;
+  int sumIter = 0;
+  int numIterations = 0;
+  FOR_EACH_ENUMERATE(aa, iter, vv) {
+    sumAA += aa;
+    sumIter += *iter;
+    ++numIterations;
+  }
+  EXPECT_EQ(sumAA, 0);
+  EXPECT_EQ(sumIter, 0);
+  EXPECT_EQ(numIterations, 0);
+
+  vv.push_back(1);
+  vv.push_back(3);
+  vv.push_back(5);
+  FOR_EACH_ENUMERATE(aa, iter, vv) {
+    sumAA += aa;
+    sumIter += *iter;
+    ++numIterations;
+  }
+  EXPECT_EQ(sumAA, 3);   // 0 + 1 + 2
+  EXPECT_EQ(sumIter, 9); // 1 + 3 + 5
+  EXPECT_EQ(numIterations, 3);
+}
+
+TEST(Foreach, ForEachEnumerateBreak) {
+  std::vector<int> vv;
+  int sumAA = 0;
+  int sumIter = 0;
+  int numIterations = 0;
+  vv.push_back(1);
+  vv.push_back(2);
+  vv.push_back(4);
+  vv.push_back(8);
+  FOR_EACH_ENUMERATE(aa, iter, vv) {
+    sumAA += aa;
+    sumIter += *iter;
+    ++numIterations;
+    if (aa == 1) break;
+  }
+  EXPECT_EQ(sumAA, 1);   // 0 + 1
+  EXPECT_EQ(sumIter, 3); // 1 + 2
+  EXPECT_EQ(numIterations, 2);
+}
+
+TEST(Foreach, ForEachRangeR) {
+  int sum = 0;
+
+  FOR_EACH_RANGE_R (i, 0, 0) {
+    sum += i;
+  }
+  EXPECT_EQ(0, sum);
+
+  FOR_EACH_RANGE_R (i, 0, -1) {
+    sum += i;
+  }
+  EXPECT_EQ(0, sum);
+
+  FOR_EACH_RANGE_R (i, 0, 5) {
+    sum += i;
+  }
+  EXPECT_EQ(10, sum);
+
+  std::list<int> lst = { 0, 1, 2, 3, 4 };
+  sum = 0;
+  FOR_EACH_RANGE_R (i, lst.begin(), lst.end()) {
+    sum += *i;
+  }
+  EXPECT_EQ(10, sum);
+}
+
+// Benchmarks:
+// 1. Benchmark iterating through the man with FOR_EACH, and also assign
+//    iter->first and iter->second to local vars inside the FOR_EACH loop.
+// 2. Benchmark iterating through the man with FOR_EACH, but use iter->first and
+//    iter->second as is, without assigning to local variables.
+// 3. Use FOR_EACH_KV loop to iterate through the map.
+
+std::map<int, std::string> bmMap;  // For use in benchmarks below.
+
+void setupBenchmark(int iters) {
+  bmMap.clear();
+  for (int i = 0; i < iters; ++i) {
+    bmMap[i] = "teststring";
+  }
+}
+
+BENCHMARK(ForEachKVNoMacroAssign, iters) {
+  int sumKeys = 0;
+  std::string sumValues;
+
+  BENCHMARK_SUSPEND {
+    setupBenchmark(iters);
+    int sumKeys = 0;
+    std::string sumValues = "";
+  }
+
+  FOR_EACH (iter, bmMap) {
+    const int k = iter->first;
+    const std::string v = iter->second;
+    sumKeys += k;
+    sumValues += v;
+  }
+}
+
+BENCHMARK(ForEachKVNoMacroNoAssign, iters) {
+  int sumKeys = 0;
+  std::string sumValues;
+
+  BENCHMARK_SUSPEND {
+    setupBenchmark(iters);
+  }
+
+  FOR_EACH (iter, bmMap) {
+    sumKeys += iter->first;
+    sumValues += iter->second;
+  }
+}
+
+BENCHMARK(ManualLoopNoAssign, iters) {
+  BENCHMARK_SUSPEND {
+    setupBenchmark(iters);
+  }
+  int sumKeys = 0;
+  std::string sumValues;
+
+  for (auto iter = bmMap.begin(); iter != bmMap.end(); ++iter) {
+    sumKeys += iter->first;
+    sumValues += iter->second;
+  }
+}
+
+BENCHMARK(ForEachKVMacro, iters) {
+  BENCHMARK_SUSPEND {
+    setupBenchmark(iters);
+  }
+  int sumKeys = 0;
+  std::string sumValues;
+
+  FOR_EACH_KV (k, v, bmMap) {
+    sumKeys += k;
+    sumValues += v;
+  }
+}
+
+BENCHMARK(ForEachManual, iters) {
+  int sum = 1;
+  for (auto i = 1; i < iters; ++i) {
+    sum *= i;
+  }
+  doNotOptimizeAway(sum);
+}
+
+BENCHMARK(ForEachRange, iters) {
+  int sum = 1;
+  FOR_EACH_RANGE (i, 1, iters) {
+    sum *= i;
+  }
+  doNotOptimizeAway(sum);
+}
+
+BENCHMARK(ForEachDescendingManual, iters) {
+  int sum = 1;
+  for (auto i = iters; i-- > 1; ) {
+    sum *= i;
+  }
+  doNotOptimizeAway(sum);
+}
+
+BENCHMARK(ForEachRangeR, iters) {
+  int sum = 1;
+  FOR_EACH_RANGE_R (i, 1, iters) {
+    sum *= i;
+  }
+  doNotOptimizeAway(sum);
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  auto r = RUN_ALL_TESTS();
+  if (r) {
+    return r;
+  }
+  runBenchmarks();
+  return 0;
+}
diff --git a/folly/test/FormatTest.cpp b/folly/test/FormatTest.cpp
new file mode 100644
index 00000000..a1ca6a29
--- /dev/null
+++ b/folly/test/FormatTest.cpp
@@ -0,0 +1,410 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Format.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "folly/FBVector.h"
+#include "folly/Benchmark.h"
+#include "folly/dynamic.h"
+#include "folly/json.h"
+
+using namespace folly;
+
+template <class... Args>
+std::string fstr(StringPiece fmt, Args&&... args) {
+  return format(fmt, std::forward<Args>(args)...).str();
+}
+
+template <class C>
+std::string vstr(StringPiece fmt, const C& c) {
+  return vformat(fmt, c).str();
+}
+
+template <class Uint>
+void compareOctal(Uint u) {
+  char buf1[detail::kMaxOctalLength + 1];
+  buf1[detail::kMaxOctalLength] = '\0';
+  char* p = buf1 + detail::uintToOctal(buf1, detail::kMaxOctalLength, u);
+
+  char buf2[detail::kMaxOctalLength + 1];
+  sprintf(buf2, "%jo", static_cast<uintmax_t>(u));
+
+  EXPECT_EQ(std::string(buf2), std::string(p));
+}
+
+template <class Uint>
+void compareHex(Uint u) {
+  char buf1[detail::kMaxHexLength + 1];
+  buf1[detail::kMaxHexLength] = '\0';
+  char* p = buf1 + detail::uintToHexLower(buf1, detail::kMaxHexLength, u);
+
+  char buf2[detail::kMaxHexLength + 1];
+  sprintf(buf2, "%jx", static_cast<uintmax_t>(u));
+
+  EXPECT_EQ(std::string(buf2), std::string(p));
+}
+
+template <class Uint>
+void compareBinary(Uint u) {
+  char buf[detail::kMaxBinaryLength + 1];
+  buf[detail::kMaxBinaryLength] = '\0';
+  char* p = buf + detail::uintToBinary(buf, detail::kMaxBinaryLength, u);
+
+  std::string repr;
+  if (u == 0) {
+    repr = '0';
+  } else {
+    std::string tmp;
+    for (; u; u >>= 1) {
+      tmp.push_back(u & 1 ? '1' : '0');
+    }
+    repr.assign(tmp.rbegin(), tmp.rend());
+  }
+
+  EXPECT_EQ(repr, std::string(p));
+}
+
+TEST(Format, uintToOctal) {
+  for (unsigned i = 0; i < (1u << 16) + 2; i++) {
+    compareOctal(i);
+  }
+}
+
+TEST(Format, uintToHex) {
+  for (unsigned i = 0; i < (1u << 16) + 2; i++) {
+    compareHex(i);
+  }
+}
+
+TEST(Format, uintToBinary) {
+  for (unsigned i = 0; i < (1u << 16) + 2; i++) {
+    compareBinary(i);
+  }
+}
+
+TEST(Format, Simple) {
+  EXPECT_EQ("hello", fstr("hello"));
+  EXPECT_EQ("42", fstr("{}", 42));
+  EXPECT_EQ("42 42", fstr("{0} {0}", 42));
+  EXPECT_EQ("00042  23   42", fstr("{0:05} {1:3} {0:4}", 42, 23));
+  EXPECT_EQ("hello world hello 42",
+            fstr("{0} {1} {0} {2}", "hello", "world", 42));
+  EXPECT_EQ("XXhelloXX", fstr("{:X^9}", "hello"));
+  EXPECT_EQ("XXX42XXXX", fstr("{:X^9}", 42));
+  EXPECT_EQ("-0xYYYY2a", fstr("{:Y=#9x}", -42));
+  EXPECT_EQ("*", fstr("{}", '*'));
+  EXPECT_EQ("42", fstr("{}", 42));
+  EXPECT_EQ("0042", fstr("{:04}", 42));
+
+  EXPECT_EQ("hello  ", fstr("{:7}", "hello"));
+  EXPECT_EQ("hello  ", fstr("{:<7}", "hello"));
+  EXPECT_EQ("  hello", fstr("{:>7}", "hello"));
+
+  std::vector<int> v1 {10, 20, 30};
+  EXPECT_EQ("0020", fstr("{0[1]:04}", v1));
+  EXPECT_EQ("0020", vstr("{1:04}", v1));
+  EXPECT_EQ("10 20", vstr("{} {}", v1));
+
+  const std::vector<int> v2 = v1;
+  EXPECT_EQ("0020", fstr("{0[1]:04}", v2));
+  EXPECT_EQ("0020", vstr("{1:04}", v2));
+
+  const int p[] = {10, 20, 30};
+  const int* q = p;
+  EXPECT_EQ("0020", fstr("{0[1]:04}", p));
+  EXPECT_EQ("0020", vstr("{1:04}", p));
+  EXPECT_EQ("0020", fstr("{0[1]:04}", q));
+  EXPECT_EQ("0020", vstr("{1:04}", q));
+
+  EXPECT_EQ("0x", fstr("{}", p).substr(0, 2));
+  EXPECT_EQ("10", vstr("{}", p));
+  EXPECT_EQ("0x", fstr("{}", q).substr(0, 2));
+  EXPECT_EQ("10", vstr("{}", q));
+  q = nullptr;
+  EXPECT_EQ("(null)", fstr("{}", q));
+
+  std::map<int, std::string> m { {10, "hello"}, {20, "world"} };
+  EXPECT_EQ("worldXX", fstr("{[20]:X<7}", m));
+  EXPECT_EQ("worldXX", vstr("{20:X<7}", m));
+
+  std::map<std::string, std::string> m2 { {"hello", "world"} };
+  EXPECT_EQ("worldXX", fstr("{[hello]:X<7}", m2));
+  EXPECT_EQ("worldXX", vstr("{hello:X<7}", m2));
+
+  // Test indexing in strings
+  EXPECT_EQ("61 62", fstr("{0[0]:x} {0[1]:x}", "abcde"));
+  EXPECT_EQ("61 62", vstr("{0:x} {1:x}", "abcde"));
+  EXPECT_EQ("61 62", fstr("{0[0]:x} {0[1]:x}", std::string("abcde")));
+  EXPECT_EQ("61 62", vstr("{0:x} {1:x}", std::string("abcde")));
+
+  // Test booleans
+  EXPECT_EQ("true", fstr("{}", true));
+  EXPECT_EQ("1", fstr("{:d}", true));
+  EXPECT_EQ("false", fstr("{}", false));
+  EXPECT_EQ("0", fstr("{:d}", false));
+
+  // Test pairs
+  {
+    std::pair<int, std::string> p {42, "hello"};
+    EXPECT_EQ("    42 hello ", fstr("{0[0]:6} {0[1]:6}", p));
+    EXPECT_EQ("    42 hello ", vstr("{:6} {:6}", p));
+  }
+
+  // Test tuples
+  {
+    std::tuple<int, std::string, int> t { 42, "hello", 23 };
+    EXPECT_EQ("    42 hello      23", fstr("{0[0]:6} {0[1]:6} {0[2]:6}", t));
+    EXPECT_EQ("    42 hello      23", vstr("{:6} {:6} {:6}", t));
+  }
+
+  // Test writing to stream
+  std::ostringstream os;
+  os << format("{} {}", 42, 23);
+  EXPECT_EQ("42 23", os.str());
+
+  // Test appending to string
+  std::string s;
+  format(&s, "{} {}", 42, 23);
+  format(&s, " hello {:X<7}", "world");
+  EXPECT_EQ("42 23 hello worldXX", s);
+}
+
+namespace {
+void testFloat(const char* fmt, double val) {
+  char buf[100];
+  sprintf(buf, to<std::string>("%", fmt).c_str(), val);
+
+  EXPECT_EQ(buf, fstr(to<std::string>("{:", fmt, "}"), val));
+}
+}  // namespace
+
+TEST(Format, Float) {
+  double d = 1;
+  EXPECT_EQ("1", fstr("{}", 1.0));
+  EXPECT_EQ("0.1", fstr("{}", 0.1));
+  EXPECT_EQ("0.01", fstr("{}", 0.01));
+  EXPECT_EQ("0.001", fstr("{}", 0.001));
+  EXPECT_EQ("0.0001", fstr("{}", 0.0001));
+  EXPECT_EQ("1e-5", fstr("{}", 0.00001));
+  EXPECT_EQ("1e-6", fstr("{}", 0.000001));
+
+  EXPECT_EQ("10", fstr("{}", 10.0));
+  EXPECT_EQ("100", fstr("{}", 100.0));
+  EXPECT_EQ("1000", fstr("{}", 1000.0));
+  EXPECT_EQ("10000", fstr("{}", 10000.0));
+  EXPECT_EQ("100000", fstr("{}", 100000.0));
+  EXPECT_EQ("1e+6", fstr("{}", 1000000.0));
+  EXPECT_EQ("1e+7", fstr("{}", 10000000.0));
+
+  EXPECT_EQ("1.00", fstr("{:.2f}", 1.0));
+  EXPECT_EQ("0.10", fstr("{:.2f}", 0.1));
+  EXPECT_EQ("0.01", fstr("{:.2f}", 0.01));
+  EXPECT_EQ("0.00", fstr("{:.2f}", 0.001));
+}
+
+TEST(Format, MultiLevel) {
+  std::vector<std::map<std::string, std::string>> v = {
+    {
+      {"hello", "world"},
+    },
+  };
+
+  EXPECT_EQ("world", fstr("{[0.hello]}", v));
+}
+
+TEST(Format, dynamic) {
+  auto dyn = parseJson(
+      "{\n"
+      "  \"hello\": \"world\",\n"
+      "  \"x\": [20, 30],\n"
+      "  \"y\": {\"a\" : 42}\n"
+      "}");
+
+  EXPECT_EQ("world", fstr("{0[hello]}", dyn));
+  EXPECT_EQ("20", fstr("{0[x.0]}", dyn));
+  EXPECT_EQ("42", fstr("{0[y.a]}", dyn));
+
+  EXPECT_EQ("(null)", fstr("{}", dynamic(nullptr)));
+}
+
+namespace {
+
+struct KeyValue {
+  std::string key;
+  int value;
+};
+
+}  // namespace
+
+namespace folly {
+
+template <> class FormatValue<KeyValue> {
+ public:
+  explicit FormatValue(const KeyValue& kv) : kv_(kv) { }
+
+  template <class FormatCallback>
+  void format(FormatArg& arg, FormatCallback& cb) const {
+    format_value::formatFormatter(
+        folly::format("<key={}, value={}>", kv_.key, kv_.value),
+        arg, cb);
+  }
+
+ private:
+  const KeyValue& kv_;
+};
+
+}  // namespace
+
+TEST(Format, Custom) {
+  KeyValue kv { "hello", 42 };
+
+  EXPECT_EQ("<key=hello, value=42>", fstr("{}", kv));
+  EXPECT_EQ("<key=hello, value=42>", fstr("{:10}", kv));
+  EXPECT_EQ("<key=hello", fstr("{:.10}", kv));
+  EXPECT_EQ("<key=hello, value=42>XX", fstr("{:X<23}", kv));
+  EXPECT_EQ("XX<key=hello, value=42>", fstr("{:X>23}", kv));
+}
+
+namespace {
+
+char bigBuf[300];
+
+}  // namespace
+
+BENCHMARK(octal_sprintf, iters) {
+  while (iters--) {
+    sprintf(bigBuf, "%o", static_cast<unsigned int>(iters));
+  }
+}
+
+BENCHMARK_RELATIVE(octal_uintToOctal, iters) {
+  while (iters--) {
+    detail::uintToOctal(bigBuf, detail::kMaxOctalLength,
+                        static_cast<unsigned int>(iters));
+  }
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(hex_sprintf, iters) {
+  while (iters--) {
+    sprintf(bigBuf, "%x", static_cast<unsigned int>(iters));
+  }
+}
+
+BENCHMARK_RELATIVE(hex_uintToHex, iters) {
+  while (iters--) {
+    detail::uintToHexLower(bigBuf, detail::kMaxHexLength,
+                           static_cast<unsigned int>(iters));
+  }
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(intAppend_sprintf) {
+  fbstring out;
+  for (int i = -1000; i < 1000; i++) {
+    sprintf(bigBuf, "%d", i);
+    out.append(bigBuf);
+  }
+}
+
+BENCHMARK_RELATIVE(intAppend_to) {
+  fbstring out;
+  for (int i = -1000; i < 1000; i++) {
+    toAppend(i, &out);
+  }
+}
+
+BENCHMARK_RELATIVE(intAppend_format) {
+  fbstring out;
+  for (int i = -1000; i < 1000; i++) {
+    format(&out, "{}", i);
+  }
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(bigFormat_sprintf, iters) {
+  while (iters--) {
+    for (int i = -100; i < 100; i++) {
+      sprintf(bigBuf,
+              "%d %d %d %d %d"
+              "%d %d %d %d %d"
+              "%d %d %d %d %d"
+              "%d %d %d %d %d",
+              i, i+1, i+2, i+3, i+4,
+              i+5, i+6, i+7, i+8, i+9,
+              i+10, i+11, i+12, i+13, i+14,
+              i+15, i+16, i+17, i+18, i+19);
+    }
+  }
+}
+
+BENCHMARK_RELATIVE(bigFormat_format, iters) {
+  char* p;
+  auto writeToBuf = [&p] (StringPiece sp) mutable {
+    memcpy(p, sp.data(), sp.size());
+    p += sp.size();
+  };
+
+  while (iters--) {
+    for (int i = -100; i < 100; i++) {
+      p = bigBuf;
+      format("{} {} {} {} {}"
+             "{} {} {} {} {}"
+             "{} {} {} {} {}"
+             "{} {} {} {} {}",
+              i, i+1, i+2, i+3, i+4,
+              i+5, i+6, i+7, i+8, i+9,
+              i+10, i+11, i+12, i+13, i+14,
+              i+15, i+16, i+17, i+18, i+19)(writeToBuf);
+    }
+  }
+}
+
+// Benchmark results on my dev server (dual-CPU Xeon L5520 @ 2.7GHz)
+//
+// ============================================================================
+// folly/test/FormatTest.cpp                         relative  ns/iter  iters/s
+// ============================================================================
+// octal_sprintf                                               100.57     9.94M
+// octal_uintToOctal                                 2599.47%    3.87   258.46M
+// ----------------------------------------------------------------------------
+// hex_sprintf                                                 100.13     9.99M
+// hex_uintToHex                                     3331.75%    3.01   332.73M
+// ----------------------------------------------------------------------------
+// intAppend_sprintf                                           406.07K    2.46K
+// intAppend_to                                       166.03%  244.58K    4.09K
+// intAppend_format                                   147.57%  275.17K    3.63K
+// ----------------------------------------------------------------------------
+// bigFormat_sprintf                                           255.40K    3.92K
+// bigFormat_format                                   102.18%  249.94K    4.00K
+// ============================================================================
+
+int main(int argc, char *argv[]) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret) {
+    runBenchmarksOnFlag();
+  }
+  return ret;
+}
+
diff --git a/folly/test/GroupVarintTest.cpp b/folly/test/GroupVarintTest.cpp
new file mode 100644
index 00000000..cdc693bc
--- /dev/null
+++ b/folly/test/GroupVarintTest.cpp
@@ -0,0 +1,261 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdarg.h>
+#include "folly/GroupVarint.h"
+
+#include <gtest/gtest.h>
+
+using namespace folly;
+
+namespace {
+
+class StringAppender {
+ public:
+  /* implicit */ StringAppender(std::string& s) : s_(s) { }
+  void operator()(StringPiece sp) {
+    s_.append(sp.data(), sp.size());
+  }
+ private:
+  std::string& s_;
+};
+
+typedef GroupVarintEncoder<uint32_t, StringAppender> GroupVarint32Encoder;
+typedef GroupVarintEncoder<uint64_t, StringAppender> GroupVarint64Encoder;
+typedef GroupVarintDecoder<uint32_t> GroupVarint32Decoder;
+typedef GroupVarintDecoder<uint32_t> GroupVarint64Decoder;
+
+// Expected bytes follow, terminate with -1
+void testGroupVarint32(uint32_t a, uint32_t b, uint32_t c, uint32_t d, ...) {
+  va_list ap;
+  va_start(ap, d);
+  std::vector<char> expectedBytes;
+  int byte;
+  while ((byte = va_arg(ap, int)) != -1) {
+    expectedBytes.push_back(byte);
+  }
+  va_end(ap);
+
+  size_t size = GroupVarint32::size(a, b, c, d);
+  EXPECT_EQ(expectedBytes.size(), size);
+
+  std::vector<char> foundBytes;
+  foundBytes.resize(size + 4);
+  char* start = &(foundBytes.front());
+  char* p = GroupVarint32::encode(start, a, b, c, d);
+  EXPECT_EQ((void*)(start + size), (void*)p);
+
+  for (size_t i = 0; i < size; i++) {
+    EXPECT_EQ(0xff & expectedBytes[i], 0xff & foundBytes[i]);
+  }
+
+  // Test decoding
+  EXPECT_EQ(size, GroupVarint32::encodedSize(start));
+
+  uint32_t fa, fb, fc, fd;
+  const char* r = GroupVarint32::decode(start, &fa, &fb, &fc, &fd);
+  EXPECT_EQ((void*)(start + size), (void*)r);
+
+  EXPECT_EQ(a, fa);
+  EXPECT_EQ(b, fb);
+  EXPECT_EQ(c, fc);
+  EXPECT_EQ(d, fd);
+}
+
+void testGroupVarint64(uint64_t a, uint64_t b, uint64_t c, uint64_t d,
+                       uint64_t e, ...) {
+  va_list ap;
+  va_start(ap, e);
+  std::vector<char> expectedBytes;
+  int byte;
+  while ((byte = va_arg(ap, int)) != -1) {
+    expectedBytes.push_back(byte);
+  }
+  va_end(ap);
+
+  size_t size = GroupVarint64::size(a, b, c, d, e);
+  EXPECT_EQ(expectedBytes.size(), size);
+
+  std::vector<char> foundBytes;
+  foundBytes.resize(size + 8);
+  char* start = &(foundBytes.front());
+  char* p = GroupVarint64::encode(start, a, b, c, d, e);
+  EXPECT_EQ((void*)(start + size), (void*)p);
+
+  for (size_t i = 0; i < size; i++) {
+    EXPECT_EQ(0xff & expectedBytes[i], 0xff & foundBytes[i]);
+  }
+
+  // Test decoding
+  EXPECT_EQ(size, GroupVarint64::encodedSize(start));
+
+  uint64_t fa, fb, fc, fd, fe;
+  const char* r = GroupVarint64::decode(start, &fa, &fb, &fc, &fd, &fe);
+  EXPECT_EQ((void*)(start + size), (void*)r);
+
+  EXPECT_EQ(a, fa);
+  EXPECT_EQ(b, fb);
+  EXPECT_EQ(c, fc);
+  EXPECT_EQ(d, fd);
+  EXPECT_EQ(e, fe);
+}
+
+}  // namespace
+
+TEST(GroupVarint, GroupVarint32) {
+  EXPECT_EQ(0, GroupVarint32::maxSize(0));
+  EXPECT_EQ(5, GroupVarint32::maxSize(1));
+  EXPECT_EQ(9, GroupVarint32::maxSize(2));
+  EXPECT_EQ(13, GroupVarint32::maxSize(3));
+  EXPECT_EQ(17, GroupVarint32::maxSize(4));
+  EXPECT_EQ(22, GroupVarint32::maxSize(5));
+  EXPECT_EQ(26, GroupVarint32::maxSize(6));
+  testGroupVarint32(0, 0, 0, 0,
+                    0, 0, 0, 0, 0, -1);
+  testGroupVarint32(1, 2, 3, 4,
+                    0, 1, 2, 3, 4, -1);
+  testGroupVarint32(1 << 8, (2 << 16) + 3, (4 << 24) + (5 << 8) + 6, 7,
+                    0x39, 0, 1, 3, 0, 2, 6, 5, 0, 4, 7, -1);
+}
+
+TEST(GroupVarint, GroupVarint64) {
+  EXPECT_EQ(0, GroupVarint64::maxSize(0));
+  EXPECT_EQ(10, GroupVarint64::maxSize(1));
+  EXPECT_EQ(18, GroupVarint64::maxSize(2));
+  EXPECT_EQ(26, GroupVarint64::maxSize(3));
+  EXPECT_EQ(34, GroupVarint64::maxSize(4));
+  EXPECT_EQ(42, GroupVarint64::maxSize(5));
+  EXPECT_EQ(52, GroupVarint64::maxSize(6));
+  testGroupVarint64(0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, -1);
+  testGroupVarint64(1, 2, 3, 4, 5,
+                    0, 0, 1, 2, 3, 4, 5, -1);
+  testGroupVarint64(1 << 8, (2 << 16) + 3, (4 << 24) + (5 << 8) + 6,
+                    (7ULL << 32) + (8 << 16),
+                    (9ULL << 56) + (10ULL << 40) + 11,
+                    0xd1, 0x78,
+                    0, 1,
+                    3, 0, 2,
+                    6, 5, 0, 4,
+                    0, 0, 8, 0, 7,
+                    11, 0, 0, 0, 0, 10, 0, 9,
+                    -1);
+}
+
+TEST(GroupVarint, GroupVarintEncoder) {
+  std::string s;
+  {
+    GroupVarint32Encoder gv(s);
+    gv.add(0);
+    gv.finish();
+  }
+  EXPECT_EQ(2, s.size());
+  EXPECT_EQ(std::string("\x00\x00", 2), s);
+  s.clear();
+  {
+    GroupVarint32Encoder gv(s);
+    gv.add(1);
+    gv.add(2);
+    gv.add(3);
+    gv.add(4);
+    gv.finish();
+  }
+  EXPECT_EQ(5, s.size());
+  EXPECT_EQ(std::string("\x00\x01\x02\x03\x04", 5), s);
+}
+
+
+TEST(GroupVarint, GroupVarintDecoder) {
+  // Make sure we don't read out of bounds
+  std::string padding(17, 'X');
+
+  {
+    std::string s("\x00\x00", 2);
+    s += padding;
+    StringPiece p(s.data(), 2);
+
+    GroupVarint32Decoder gv(p);
+    uint32_t v;
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(0, v);
+    EXPECT_FALSE(gv.next(&v));
+    EXPECT_TRUE(gv.rest().empty());
+  }
+
+  {
+    std::string s("\x00\x01\x02\x03\x04\x01\x02\x03\x04", 9);
+    s += padding;
+    StringPiece p(s.data(), 9);
+
+    GroupVarint32Decoder gv(p);
+    uint32_t v;
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(1, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(2, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(3, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(4, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(0x0302, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(4, v);
+    EXPECT_FALSE(gv.next(&v));
+    EXPECT_TRUE(gv.rest().empty());
+  }
+
+  {
+    // Limit max count when reading a full block
+    std::string s("\x00\x01\x02\x03\x04\x01\x02\x03\x04", 9);
+    s += padding;
+    StringPiece p(s.data(), 9);
+
+    GroupVarint32Decoder gv(p, 3);
+    uint32_t v;
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(1, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(2, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(3, v);
+    EXPECT_FALSE(gv.next(&v));
+    EXPECT_EQ(std::string("\x04\x01\x02\x03\x04", 5), gv.rest().toString());
+  }
+
+  {
+    // Limit max count when reading a partial block
+    std::string s("\x00\x01\x02\x03\x04\x01\x02\x03\x04", 9);
+    s += padding;
+    StringPiece p(s.data(), 9);
+
+    GroupVarint32Decoder gv(p, 5);
+    uint32_t v;
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(1, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(2, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(3, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(4, v);
+    EXPECT_TRUE(gv.next(&v));
+    EXPECT_EQ(0x0302, v);
+    EXPECT_FALSE(gv.next(&v));
+    EXPECT_EQ(std::string("\x04", 1), gv.rest().toString());
+  }
+}
+
diff --git a/folly/test/HashTest.cpp b/folly/test/HashTest.cpp
new file mode 100644
index 00000000..6750eab7
--- /dev/null
+++ b/folly/test/HashTest.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Hash.h"
+#include "folly/MapUtil.h"
+#include <gtest/gtest.h>
+#include <stdint.h>
+#include <unordered_map>
+
+using namespace folly::hash;
+
+TEST(Hash, Fnv32) {
+  const char* s1 = "hello, world!";
+  const uint32_t s1_res = 3180823791ul;
+  EXPECT_EQ(fnv32(s1), s1_res);
+  EXPECT_EQ(fnv32(s1), fnv32_buf(s1, strlen(s1)));
+
+  const char* s2 = "monkeys! m0nk3yz! ev3ry \\/\\/here~~~~";
+  const uint32_t s2_res = 194407565ul;
+  EXPECT_EQ(fnv32(s2), s2_res);
+  EXPECT_EQ(fnv32(s2), fnv32_buf(s2, strlen(s2)));
+
+  const char* s3 = "";
+  const uint32_t s3_res = 216613626ul;
+  EXPECT_EQ(fnv32(s3), s3_res);
+  EXPECT_EQ(fnv32(s3), fnv32_buf(s3, strlen(s3)));
+}
+
+TEST(Hash, Fnv64) {
+  const char* s1 = "hello, world!";
+  const uint64_t s1_res = 13991426986746681734ULL;
+  EXPECT_EQ(fnv64(s1), s1_res);
+  EXPECT_EQ(fnv64(s1), fnv64_buf(s1, strlen(s1)));
+
+  const char* s2 = "monkeys! m0nk3yz! ev3ry \\/\\/here~~~~";
+  const uint64_t s2_res = 6091394665637302478ULL;
+  EXPECT_EQ(fnv64(s2), s2_res);
+  EXPECT_EQ(fnv64(s2), fnv64_buf(s2, strlen(s2)));
+
+  const char* s3 = "";
+  const uint64_t s3_res = 14695981039346656037ULL;
+  EXPECT_EQ(fnv64(s3), s3_res);
+  EXPECT_EQ(fnv64(s3), fnv64_buf(s3, strlen(s3)));
+
+  // note: Use fnv64_buf to make a single hash value from multiple
+  // fields/datatypes.
+  const char* t4_a = "E Pluribus";
+  int64_t t4_b = 0xF1E2D3C4B5A69788;
+  int32_t t4_c = 0xAB12CD34;
+  const char* t4_d = "Unum";
+  uint64_t t4_res = 15571330457339273965ULL;
+  uint64_t t4_hash1 = fnv64_buf(t4_a,
+                                strlen(t4_a));
+  uint64_t t4_hash2 = fnv64_buf(reinterpret_cast<void*>(&t4_b),
+                                sizeof(int64_t),
+                                t4_hash1);
+  uint64_t t4_hash3 = fnv64_buf(reinterpret_cast<void*>(&t4_c),
+                                sizeof(int32_t),
+                                t4_hash2);
+  uint64_t t4_hash4 = fnv64_buf(t4_d,
+                                strlen(t4_d),
+                                t4_hash3);
+  EXPECT_EQ(t4_hash4, t4_res);
+  // note: These are probabalistic, not determinate, but c'mon.
+  // These hash values should be different, or something's not
+  // working.
+  EXPECT_NE(t4_hash1, t4_hash4);
+  EXPECT_NE(t4_hash2, t4_hash4);
+  EXPECT_NE(t4_hash3, t4_hash4);
+}
+
+TEST(Hash, Hsieh32) {
+  const char* s1 = "hello, world!";
+  const uint32_t s1_res = 2918802987ul;
+  EXPECT_EQ(hsieh_hash32(s1), s1_res);
+  EXPECT_EQ(hsieh_hash32(s1), hsieh_hash32_buf(s1, strlen(s1)));
+
+  const char* s2 = "monkeys! m0nk3yz! ev3ry \\/\\/here~~~~";
+  const uint32_t s2_res = 47373213ul;
+  EXPECT_EQ(hsieh_hash32(s2), s2_res);
+  EXPECT_EQ(hsieh_hash32(s2), hsieh_hash32_buf(s2, strlen(s2)));
+
+  const char* s3 = "";
+  const uint32_t s3_res = 0;
+  EXPECT_EQ(hsieh_hash32(s3), s3_res);
+  EXPECT_EQ(hsieh_hash32(s3), hsieh_hash32_buf(s3, strlen(s3)));
+}
+
+TEST(Hash, TWang_Mix64) {
+  uint64_t i1 = 0x78a87873e2d31dafULL;
+  uint64_t i1_res = 3389151152926383528ULL;
+  EXPECT_EQ(twang_mix64(i1), i1_res);
+
+  uint64_t i2 = 0x0123456789abcdefULL;
+  uint64_t i2_res = 3061460455458984563ull;
+  EXPECT_EQ(twang_mix64(i2), i2_res);
+}
+
+TEST(Hash, TWang_32From64) {
+  uint64_t i1 = 0x78a87873e2d31dafULL;
+  uint32_t i1_res = 1525586863ul;
+  EXPECT_EQ(twang_32from64(i1), i1_res);
+
+  uint64_t i2 = 0x0123456789abcdefULL;
+  uint32_t i2_res = 2918899159ul;
+  EXPECT_EQ(twang_32from64(i2), i2_res);
+}
+
+TEST(Hash, Jenkins_Rev_Mix32) {
+  uint32_t i1 = 3805486511ul;
+  uint32_t i1_res = 381808021ul;
+  EXPECT_EQ(jenkins_rev_mix32(i1), i1_res);
+
+  uint32_t i2 = 2309737967ul;
+  uint32_t i2_res = 1834777923ul;
+  EXPECT_EQ(jenkins_rev_mix32(i2), i2_res);
+}
+
+TEST(Hash, hasher) {
+  // Basically just confirms that things compile ok.
+  std::unordered_map<int32_t,int32_t,folly::hasher<int32_t>> m;
+  m.insert(std::make_pair(4, 5));
+  EXPECT_EQ(get_default(m, 4), 5);
+}
diff --git a/folly/test/HistogramTest.cpp b/folly/test/HistogramTest.cpp
new file mode 100644
index 00000000..a5cfd951
--- /dev/null
+++ b/folly/test/HistogramTest.cpp
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Histogram.h"
+
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+
+using folly::Histogram;
+
+// Insert 100 evenly distributed values into a histogram with 100 buckets
+TEST(Histogram, Test100) {
+  Histogram<int64_t> h(1, 0, 100);
+
+  for (unsigned int n = 0; n < 100; ++n) {
+    h.addValue(n);
+  }
+
+  // 100 buckets, plus 1 for below min, and 1 for above max
+  EXPECT_EQ(h.getNumBuckets(), 102);
+
+  double epsilon = 1e-6;
+  for (unsigned int n = 0; n <= 100; ++n) {
+    double pct = n / 100.0;
+
+    // Floating point arithmetic isn't 100% accurate, and if we just divide
+    // (n / 100) the value should be exactly on a bucket boundary.  Add espilon
+    // to ensure we fall in the upper bucket.
+    if (n < 100) {
+      double lowPct = -1.0;
+      double highPct = -1.0;
+      unsigned int bucketIdx = h.getPercentileBucketIdx(pct + epsilon,
+                                                        &lowPct, &highPct);
+      EXPECT_EQ(n + 1, bucketIdx);
+      EXPECT_FLOAT_EQ(n / 100.0, lowPct);
+      EXPECT_FLOAT_EQ((n + 1) / 100.0, highPct);
+    }
+
+    // Also test n - epsilon, to test falling in the lower bucket.
+    if (n > 0) {
+      double lowPct = -1.0;
+      double highPct = -1.0;
+      unsigned int bucketIdx = h.getPercentileBucketIdx(pct - epsilon,
+                                                        &lowPct, &highPct);
+      EXPECT_EQ(n, bucketIdx);
+      EXPECT_FLOAT_EQ((n - 1) / 100.0, lowPct);
+      EXPECT_FLOAT_EQ(n / 100.0, highPct);
+    }
+
+    // Check getPercentileEstimate()
+    EXPECT_EQ(n, h.getPercentileEstimate(pct));
+  }
+}
+
+// Test calling getPercentileBucketIdx() and getPercentileEstimate() on an
+// empty histogram
+TEST(Histogram, TestEmpty) {
+  Histogram<int64_t> h(1, 0, 100);
+
+  for (unsigned int n = 0; n <= 100; ++n) {
+    double pct = n / 100.0;
+
+    double lowPct = -1.0;
+    double highPct = -1.0;
+    unsigned int bucketIdx = h.getPercentileBucketIdx(pct, &lowPct, &highPct);
+    EXPECT_EQ(1, bucketIdx);
+    EXPECT_FLOAT_EQ(0.0, lowPct);
+    EXPECT_FLOAT_EQ(0.0, highPct);
+
+    EXPECT_EQ(0, h.getPercentileEstimate(pct));
+  }
+}
+
+// Test calling getPercentileBucketIdx() and getPercentileEstimate() on a
+// histogram with just a single value.
+TEST(Histogram, Test1) {
+  Histogram<int64_t> h(1, 0, 100);
+  h.addValue(42);
+
+  for (unsigned int n = 0; n < 100; ++n) {
+    double pct = n / 100.0;
+
+    double lowPct = -1.0;
+    double highPct = -1.0;
+    unsigned int bucketIdx = h.getPercentileBucketIdx(pct, &lowPct, &highPct);
+    EXPECT_EQ(43, bucketIdx);
+    EXPECT_FLOAT_EQ(0.0, lowPct);
+    EXPECT_FLOAT_EQ(1.0, highPct);
+
+    EXPECT_EQ(42, h.getPercentileEstimate(pct));
+  }
+}
+
+// Test adding enough numbers to make the sum value overflow in the
+// "below min" bucket
+TEST(Histogram, TestOverflowMin) {
+  Histogram<int64_t> h(1, 0, 100);
+
+  for (unsigned int n = 0; n < 9; ++n) {
+    h.addValue(-0x0fffffffffffffff);
+  }
+
+  // Compute a percentile estimate.  We only added values to the "below min"
+  // bucket, so this should check that bucket.  We're mainly verifying that the
+  // code doesn't crash here when the bucket average is larger than the max
+  // value that is supposed to be in the bucket.
+  int64_t estimate = h.getPercentileEstimate(0.05);
+  // The code will return the smallest possible value when it detects an
+  // overflow beyond the minimum value.
+  EXPECT_EQ(std::numeric_limits<int64_t>::min(), estimate);
+}
+
+// Test adding enough numbers to make the sum value overflow in the
+// "above max" bucket
+TEST(Histogram, TestOverflowMax) {
+  Histogram<int64_t> h(1, 0, 100);
+
+  for (unsigned int n = 0; n < 9; ++n) {
+    h.addValue(0x0fffffffffffffff);
+  }
+
+  // The code will return the maximum possible value when it detects an
+  // overflow beyond the max value.
+  int64_t estimate = h.getPercentileEstimate(0.95);
+  EXPECT_EQ(std::numeric_limits<int64_t>::max(), estimate);
+}
+
+// Test adding enough numbers to make the sum value overflow in one of the
+// normal buckets
+TEST(Histogram, TestOverflowBucket) {
+  Histogram<int64_t> h(0x0100000000000000, 0, 0x1000000000000000);
+
+  for (unsigned int n = 0; n < 9; ++n) {
+    h.addValue(0x0fffffffffffffff);
+  }
+
+  // The histogram code should return the bucket midpoint
+  // when it detects overflow.
+  int64_t estimate = h.getPercentileEstimate(0.95);
+  EXPECT_EQ(0x0f80000000000000, estimate);
+}
+
+TEST(Histogram, TestDouble) {
+  // Insert 100 evenly spaced values into a histogram
+  Histogram<double> h(100.0, 0.0, 5000.0);
+  for (double n = 50; n < 5000; n += 100) {
+    h.addValue(n);
+  }
+  EXPECT_EQ(52, h.getNumBuckets());
+  EXPECT_EQ(2500.0, h.getPercentileEstimate(0.5));
+  EXPECT_EQ(4500.0, h.getPercentileEstimate(0.9));
+}
+
+// Test where the bucket width is not an even multiple of the histogram range
+TEST(Histogram, TestDoubleInexactWidth) {
+  Histogram<double> h(100.0, 0.0, 4970.0);
+  for (double n = 50; n < 5000; n += 100) {
+    h.addValue(n);
+  }
+  EXPECT_EQ(52, h.getNumBuckets());
+  EXPECT_EQ(2500.0, h.getPercentileEstimate(0.5));
+  EXPECT_EQ(4500.0, h.getPercentileEstimate(0.9));
+
+  EXPECT_EQ(0, h.getBucketByIndex(51).count);
+  h.addValue(4990);
+  h.addValue(5100);
+  EXPECT_EQ(2, h.getBucketByIndex(51).count);
+  EXPECT_EQ(2600.0, h.getPercentileEstimate(0.5));
+}
+
+// Test where the bucket width is larger than the histogram range
+// (There isn't really much point to defining a histogram this way,
+// but we want to ensure that it still works just in case.)
+TEST(Histogram, TestDoubleWidthTooBig) {
+  Histogram<double> h(100.0, 0.0, 7.0);
+  EXPECT_EQ(3, h.getNumBuckets());
+
+  for (double n = 0; n < 7; n += 1) {
+    h.addValue(n);
+  }
+  EXPECT_EQ(0, h.getBucketByIndex(0).count);
+  EXPECT_EQ(7, h.getBucketByIndex(1).count);
+  EXPECT_EQ(0, h.getBucketByIndex(2).count);
+  EXPECT_EQ(3.0, h.getPercentileEstimate(0.5));
+
+  h.addValue(-1.0);
+  EXPECT_EQ(1, h.getBucketByIndex(0).count);
+  h.addValue(7.5);
+  EXPECT_EQ(1, h.getBucketByIndex(2).count);
+  EXPECT_EQ(3.0, h.getPercentileEstimate(0.5));
+}
diff --git a/folly/test/JsonTest.cpp b/folly/test/JsonTest.cpp
new file mode 100644
index 00000000..ea74a348
--- /dev/null
+++ b/folly/test/JsonTest.cpp
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/json.h"
+#include <gtest/gtest.h>
+#include <gflags/gflags.h>
+#include <cmath>
+#include <limits>
+#include <iostream>
+#include <boost/next_prior.hpp>
+#include "folly/Benchmark.h"
+
+using folly::dynamic;
+using folly::parseJson;
+using folly::toJson;
+
+TEST(Json, Unicode) {
+  auto val = parseJson("\"I \u2665 UTF-8\"");
+  EXPECT_EQ("I \u2665 UTF-8", val.asString());
+  val = parseJson("\"I \\u2665 UTF-8\"");
+  EXPECT_EQ("I \u2665 UTF-8", val.asString());
+  val = parseJson("\"I \U0001D11E playing in G-clef\"");
+  EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString());
+
+  val = parseJson("\"I \\uD834\\uDD1E playing in G-clef\"");
+  EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString());
+}
+
+TEST(Json, Parse) {
+  auto num = parseJson("12");
+  EXPECT_TRUE(num.isInt());
+  EXPECT_EQ(num, 12);
+  num = parseJson("12e5");
+  EXPECT_TRUE(num.isDouble());
+  EXPECT_EQ(num, 12e5);
+  auto numAs1 = num.asDouble();
+  EXPECT_EQ(numAs1, 12e5);
+  EXPECT_EQ(num, 12e5);
+  EXPECT_EQ(num, 1200000);
+
+  auto largeNumber = parseJson("4611686018427387904");
+  EXPECT_TRUE(largeNumber.isInt());
+  EXPECT_EQ(largeNumber, 4611686018427387904L);
+
+  auto negative = parseJson("-123");
+  EXPECT_EQ(negative, -123);
+
+  auto bfalse = parseJson("false");
+  auto btrue = parseJson("true");
+  EXPECT_EQ(bfalse, false);
+  EXPECT_EQ(btrue, true);
+
+  auto null = parseJson("null");
+  EXPECT_TRUE(null == nullptr);
+
+  auto doub1 = parseJson("12.0");
+  auto doub2 = parseJson("12e2");
+  EXPECT_EQ(doub1, 12.0);
+  EXPECT_EQ(doub2, 12e2);
+  EXPECT_EQ(std::numeric_limits<double>::infinity(),
+            parseJson("Infinity").asDouble());
+  EXPECT_EQ(-std::numeric_limits<double>::infinity(),
+            parseJson("-Infinity").asDouble());
+  EXPECT_TRUE(std::isnan(parseJson("NaN").asDouble()));
+  EXPECT_THROW(parseJson("infinity"), std::runtime_error);
+  EXPECT_THROW(parseJson("inf"), std::runtime_error);
+  EXPECT_THROW(parseJson("nan"), std::runtime_error);
+
+  auto array = parseJson(
+    "[12,false, false  , null , [12e4,32, [], 12]]");
+  EXPECT_EQ(array.size(), 5);
+  if (array.size() == 5) {
+    EXPECT_EQ(boost::prior(array.end())->size(), 4);
+  }
+
+  bool caught = false;
+  try {
+    parseJson("\n[12,\n\nnotvalidjson");
+  } catch (const std::exception& e) {
+    caught = true;
+  }
+  EXPECT_TRUE(caught);
+
+  caught = false;
+  try {
+    parseJson("12e2e2");
+  } catch (const std::exception& e) {
+    caught = true;
+  }
+  EXPECT_TRUE(caught);
+
+  caught = false;
+  try {
+    parseJson("{\"foo\":12,\"bar\":42} \"something\"");
+  } catch (const std::exception& e) {
+    // incomplete parse
+    caught = true;
+  }
+  EXPECT_TRUE(caught);
+
+  dynamic anotherVal = dynamic::object
+    ("foo", "bar")
+    ("junk", 12)
+    ("another", 32.2)
+    ("a",
+      {
+        dynamic::object("a", "b")
+                       ("c", "d"),
+        12.5,
+        "Yo Dawg",
+        { "heh" },
+        nullptr
+      }
+    )
+    ;
+
+  // Print then parse and get the same thing, hopefully.
+  auto value = parseJson(toJson(anotherVal));
+  EXPECT_EQ(value, anotherVal);
+
+  // Test an object with non-string values.
+  dynamic something = folly::parseJson(
+    "{\"old_value\":40,\"changed\":true,\"opened\":false}");
+  dynamic expected = dynamic::object
+    ("old_value", 40)
+    ("changed", true)
+    ("opened", false);
+  EXPECT_EQ(something, expected);
+}
+
+TEST(Json, JavascriptSafe) {
+  auto badDouble = (1ll << 63ll) + 1;
+  dynamic badDyn = badDouble;
+  EXPECT_EQ(folly::toJson(badDouble), folly::to<folly::fbstring>(badDouble));
+  folly::json::serialization_opts opts;
+  opts.javascript_safe = true;
+  EXPECT_ANY_THROW(folly::json::serialize(badDouble, opts));
+
+  auto okDouble = 1ll << 63ll;
+  dynamic okDyn = okDouble;
+  EXPECT_EQ(folly::toJson(okDouble), folly::to<folly::fbstring>(okDouble));
+}
+
+TEST(Json, Produce) {
+  auto value = parseJson(R"( "f\"oo" )");
+  EXPECT_EQ(toJson(value), R"("f\"oo")");
+  value = parseJson("\"Control code: \001 \002 \x1f\"");
+  EXPECT_EQ(toJson(value), R"("Control code: \u0001 \u0002 \u001f")");
+
+  bool caught = false;
+  try {
+    dynamic d = dynamic::object;
+    d["abc"] = "xyz";
+    d[42.33] = "asd";
+    auto str = toJson(d);
+  } catch (std::exception const& e) {
+    // We're not allowed to have non-string keys in json.
+    caught = true;
+  }
+  EXPECT_TRUE(caught);
+}
+
+TEST(Json, JsonNonAsciiEncoding) {
+  folly::json::serialization_opts opts;
+  opts.encode_non_ascii = true;
+
+  // simple tests
+  EXPECT_EQ(folly::json::serialize("\x1f", opts), R"("\u001f")");
+  EXPECT_EQ(folly::json::serialize("\xc2\xa2", opts), R"("\u00a2")");
+  EXPECT_EQ(folly::json::serialize("\xe2\x82\xac", opts), R"("\u20ac")");
+
+  // multiple unicode encodings
+  EXPECT_EQ(
+    folly::json::serialize("\x1f\xe2\x82\xac", opts),
+    R"("\u001f\u20ac")");
+  EXPECT_EQ(
+    folly::json::serialize("\x1f\xc2\xa2\xe2\x82\xac", opts),
+    R"("\u001f\u00a2\u20ac")");
+  EXPECT_EQ(
+    folly::json::serialize("\xc2\x80\xef\xbf\xbf", opts),
+    R"("\u0080\uffff")");
+  EXPECT_EQ(
+    folly::json::serialize("\xe0\xa0\x80\xdf\xbf", opts),
+    R"("\u0800\u07ff")");
+
+  // first possible sequence of a certain length
+  EXPECT_EQ(folly::json::serialize("\xc2\x80", opts), R"("\u0080")");
+  EXPECT_EQ(folly::json::serialize("\xe0\xa0\x80", opts), R"("\u0800")");
+
+  // last possible sequence of a certain length
+  EXPECT_EQ(folly::json::serialize("\xdf\xbf", opts), R"("\u07ff")");
+  EXPECT_EQ(folly::json::serialize("\xef\xbf\xbf", opts), R"("\uffff")");
+
+  // other boundary conditions
+  EXPECT_EQ(folly::json::serialize("\xed\x9f\xbf", opts), R"("\ud7ff")");
+  EXPECT_EQ(folly::json::serialize("\xee\x80\x80", opts), R"("\ue000")");
+  EXPECT_EQ(folly::json::serialize("\xef\xbf\xbd", opts), R"("\ufffd")");
+
+  // incomplete sequences
+  EXPECT_ANY_THROW(folly::json::serialize("a\xed\x9f", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("b\xee\x80", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("c\xef\xbf", opts));
+
+  // impossible bytes
+  EXPECT_ANY_THROW(folly::json::serialize("\xfe", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("\xff", opts));
+
+  // Sample overlong sequences
+  EXPECT_ANY_THROW(folly::json::serialize("\xc0\xaf", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("\xe0\x80\xaf", opts));
+
+  // Maximum overlong sequences
+  EXPECT_ANY_THROW(folly::json::serialize("\xc1\xbf", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("\x30\x9f\xbf", opts));
+
+  // illegal code positions
+  EXPECT_ANY_THROW(folly::json::serialize("\xed\xa0\x80", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("\xed\xbf\xbf", opts));
+
+  // Overlong representation of NUL character
+  EXPECT_ANY_THROW(folly::json::serialize("\xc0\x80", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("\xe0\x80\x80", opts));
+
+  // Longer than 3 byte encodings
+  EXPECT_ANY_THROW(folly::json::serialize("\xf4\x8f\xbf\xbf", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("\xed\xaf\xbf\xed\xbf\xbf", opts));
+}
+
+TEST(Json, UTF8Validation) {
+  folly::json::serialization_opts opts;
+  opts.validate_utf8 = true;
+
+  // valid utf8 strings
+  EXPECT_EQ(folly::json::serialize("a\xc2\x80z", opts), R"("a\u00c2\u0080z")");
+  EXPECT_EQ(
+    folly::json::serialize("a\xe0\xa0\x80z", opts),
+    R"("a\u00e0\u00a0\u0080z")");
+  EXPECT_EQ(
+    folly::json::serialize("a\xe0\xa0\x80m\xc2\x80z", opts),
+    R"("a\u00e0\u00a0\u0080m\u00c2\u0080z")");
+
+  // test with invalid utf8
+  EXPECT_ANY_THROW(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts));
+  EXPECT_ANY_THROW(folly::json::serialize("a\xe0\xa0\x80z\xe0\x80\x80", opts));
+}
+
+BENCHMARK(jsonSerialize, iters) {
+  folly::json::serialization_opts opts;
+  for (int i = 0; i < iters; ++i) {
+    folly::json::serialize(
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy",
+      opts);
+  }
+}
+
+BENCHMARK(jsonSerializeWithNonAsciiEncoding, iters) {
+  folly::json::serialization_opts opts;
+  opts.encode_non_ascii = true;
+
+  for (int i = 0; i < iters; ++i) {
+    folly::json::serialize(
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy",
+      opts);
+  }
+}
+
+BENCHMARK(jsonSerializeWithUtf8Validation, iters) {
+  folly::json::serialization_opts opts;
+  opts.validate_utf8 = true;
+
+  for (int i = 0; i < iters; ++i) {
+    folly::json::serialize(
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy"
+      "qwerty \xc2\x80 \xef\xbf\xbf poiuy",
+      opts);
+  }
+}
+
+BENCHMARK(parseSmallStringWithUtf, iters) {
+  for (int i = 0; i < iters << 4; ++i) {
+    parseJson("\"I \\u2665 UTF-8 thjasdhkjh blah blah blah\"");
+  }
+}
+
+BENCHMARK(parseNormalString, iters) {
+  for (int i = 0; i < iters << 4; ++i) {
+    parseJson("\"akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk\"");
+  }
+}
+
+BENCHMARK(parseBigString, iters) {
+  for (int i = 0; i < iters; ++i) {
+    parseJson("\""
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "akjhfk jhkjlakjhfk jhkjlakjhfk jhkjl akjhfk"
+      "\"");
+  }
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  if (FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return RUN_ALL_TESTS();
+}
diff --git a/folly/test/Makefile.am b/folly/test/Makefile.am
new file mode 100644
index 00000000..c78b32a0
--- /dev/null
+++ b/folly/test/Makefile.am
@@ -0,0 +1,131 @@
+SUBDIRS = . function_benchmark
+
+ACLOCAL_AMFLAGS = -I m4
+
+CPPFLAGS += -Igtest-1.6.0/include
+
+TESTS= \
+	sorted_vector_types_test \
+	foreach_test \
+	hash_test \
+	timeout_queue_test \
+	conv_test \
+	range_test \
+	bits_test \
+	bit_iterator_test
+
+lib_LTLIBRARIES = libgtestmain.la
+libgtestmain_la_CPPFLAGS = -Igtest-1.6.0 -Igtest-1.6.0/src
+libgtestmain_la_SOURCES = gtest-1.6.0/src/gtest-all.cc gtest-1.6.0/src/gtest_main.cc
+
+noinst_HEADERS = FBStringTestBenchmarks.cpp.h \
+		 FBVectorTestBenchmarks.cpp.h
+
+noinst_PROGRAMS=benchmark_test
+
+if HAVE_X86_64
+small_locks_test_SOURCES = SmallLocksTest.cpp
+small_locks_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+TESTS += small_locks_test
+
+# Fails with WARNING: Logging before InitGoogleLogging() is written to STDERR
+packed_sync_ptr_test_SOURCES = PackedSyncPtrTest.cpp
+packed_sync_ptr_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+TESTS += packed_sync_ptr_test
+
+small_vector_test_SOURCES = small_vector_test.cpp
+small_vector_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+TESTS += small_vector_test
+
+discriminated_ptr_test_SOURCES = DiscriminatedPtrTest.cpp
+discriminated_ptr_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+TESTS += discriminated_ptr_test
+endif
+
+sorted_vector_types_test_SOURCES = sorted_vector_test.cpp
+sorted_vector_types_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+
+
+foreach_test_SOURCES = ForeachTest.cpp
+foreach_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+
+hash_test_SOURCES = HashTest.cpp
+hash_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+
+
+fbstring_test_using_jemalloc_SOURCES = FBStringTest.cpp
+fbstring_test_using_jemalloc_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+TESTS += fbstring_test_using_jemalloc
+
+eventfd_test_SOURCES = EventFDTest.cpp
+eventfd_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+
+thread_cached_int_test_SOURCES = ThreadCachedIntTest.cpp
+thread_cached_int_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+
+thread_local_test_SOURCES = ThreadLocalTest.cpp
+thread_local_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+
+TESTS += eventfd_test thread_cached_int_test thread_local_test
+
+fbvector_test_SOURCES = FBVectorTest.cpp
+fbvector_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+TESTS += fbvector_test
+
+# fails due to cout
+dynamic_test_SOURCES = DynamicTest.cpp
+dynamic_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la $(top_builddir)/libfolly.la
+TESTS += dynamic_test
+
+# fails due to cout
+json_test_SOURCES = JsonTest.cpp
+json_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la  $(top_builddir)/libfolly.la
+TESTS += json_test
+
+benchmark_test_SOURCES = BenchmarkTest.cpp
+benchmark_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+
+# fails due to destructor
+scope_guard_test_SOURCES = ScopeGuardTest.cpp
+scope_guard_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+TESTS += scope_guard_test
+
+timeout_queue_test_SOURCES = TimeoutQueueTest.cpp
+timeout_queue_test_LDADD = libgtestmain.la $(top_builddir)/libfollytimeout_queue.la
+
+conv_test_SOURCES = ConvTest.cpp
+conv_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+
+range_test_SOURCES = RangeTest.cpp
+range_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+
+bits_test_SOURCES = BitsTest.cpp
+bits_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+
+bit_iterator_test_SOURCES = BitIteratorTest.cpp
+bit_iterator_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la
+
+endian_test_SOURCES = EndianTest.cpp
+endian_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+TESTS += endian_test
+
+# needs externals/glog to use third-party glog
+# rw_spinlock_test_SOURCES = RWSpinLockTest.cpp
+# rw_spinlock_test_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la  $(BOOST_THREAD_LIBS)
+# TESTS += rw_spinlock_test
+
+synchronized_test_SOURCES = SynchronizedTest.cpp
+synchronized_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+TESTS += synchronized_test
+
+# needs externals/glog to use third-party glog
+# concurrent_skiplist_test_SOURCES = ConcurrentSkipListTest.cpp
+# concurrent_skiplist_test_LDADD = libgtestmain.la $(top_builddir)/libfolly.la
+# TESTS += concurrent_skiplist_test
+
+# needs externals/glog to use third-party glog
+# concurrent_skiplist_benchmark_SOURCES = ConcurrentSkipListBenchmark.cpp
+# concurrent_skiplist_benchmark_LDADD = libgtestmain.la $(top_builddir)/libfollybenchmark.la $(top_builddir)/libfolly.la  $(BOOST_THREAD_LIBS)
+# noinst_PROGRAMS += concurrent_skiplist_benchmark
+
+check_PROGRAMS= $(TESTS)
diff --git a/folly/test/MapUtilTest.cpp b/folly/test/MapUtilTest.cpp
new file mode 100644
index 00000000..50c51c76
--- /dev/null
+++ b/folly/test/MapUtilTest.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/MapUtil.h"
+
+#include <map>
+#include <gtest/gtest.h>
+
+using namespace folly;
+
+TEST(MapUtil, Simple) {
+  std::map<int, int> m;
+  m[1] = 2;
+  EXPECT_EQ(2, get_default(m, 1, 42));
+  EXPECT_EQ(42, get_default(m, 2, 42));
+  EXPECT_EQ(0, get_default(m, 3));
+  EXPECT_EQ(2, *get_ptr(m, 1));
+  EXPECT_TRUE(get_ptr(m, 2) == nullptr);
+  *get_ptr(m, 1) = 4;
+  EXPECT_EQ(4, m.at(1));
+}
diff --git a/folly/test/PackedSyncPtrTest.cpp b/folly/test/PackedSyncPtrTest.cpp
new file mode 100644
index 00000000..a2dd8e5b
--- /dev/null
+++ b/folly/test/PackedSyncPtrTest.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/PackedSyncPtr.h"
+
+#include <cinttypes>
+#include <gtest/gtest.h>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+using folly::PackedSyncPtr;
+
+namespace {
+
+// Compile time check for packability.  This requires that
+// PackedSyncPtr is a POD struct on gcc.
+struct ignore { PackedSyncPtr<int> foo; char c; } __attribute__((packed));
+static_assert(sizeof(ignore) == 9, "PackedSyncPtr wasn't packable");
+
+}
+
+TEST(PackedSyncPtr, Basic) {
+  PackedSyncPtr<std::pair<int,int>> sp;
+  sp.init(new std::pair<int,int>[2]);
+  EXPECT_EQ(sizeof(sp), 8);
+  sp->first = 5;
+  EXPECT_EQ(sp[0].first, 5);
+  sp[1].second = 7;
+  EXPECT_EQ(sp[1].second, 7);
+  sp.lock();
+  EXPECT_EQ(sp[1].second, 7);
+  sp[0].first = 9;
+  EXPECT_EQ(sp->first, 9);
+  sp.unlock();
+  EXPECT_EQ((sp.get() + 1)->second, 7);
+
+  sp.lock();
+  EXPECT_EQ(sp.extra(), 0);
+  sp.setExtra(0x13);
+  EXPECT_EQ(sp.extra(), 0x13);
+  EXPECT_EQ((sp.get() + 1)->second, 7);
+  delete sp.get();
+  auto newP = new std::pair<int,int>();
+  sp.set(newP);
+  EXPECT_EQ(sp.extra(), 0x13);
+  EXPECT_EQ(sp.get(), newP);
+  sp.unlock();
+}
+
+// Here we use the PackedSyncPtr to lock the whole SyncVec (base, *base, and sz)
+template<typename T>
+struct SyncVec {
+  PackedSyncPtr<T> base;
+  SyncVec() { base.init(); }
+  void push_back(const T& t) {
+    base.set((T*) realloc(base.get(),
+      (base.extra() + 1) * sizeof(T)));
+    base[base.extra()] = t;
+    base.setExtra(base.extra() + 1);
+  }
+  void lock() {
+    base.lock();
+  }
+  void unlock() {
+    base.unlock();
+  }
+
+  T* begin() const { return base.get(); }
+  T* end() const { return base.get() + base.extra(); }
+};
+typedef SyncVec<intptr_t> VecT;
+typedef std::unordered_map<int64_t, VecT> Map;
+const int mapCap = 1317;
+const int nthrs = 297;
+static Map map(mapCap);
+
+// Each app thread inserts it's ID into every vec in map
+// map is read only, so doesn't need any additional locking
+void appThread(intptr_t id) {
+  for (auto& kv : map) {
+    kv.second.lock();
+    kv.second.push_back(id);
+    kv.second.unlock();
+  }
+}
+
+TEST(PackedSyncPtr, Application) {
+  for (int64_t i = 0; i < mapCap / 2; ++i) {
+    map.insert(std::make_pair(i, VecT()));
+  }
+  std::vector<std::thread> thrs;
+  for (intptr_t i = 0; i < nthrs; i++) {
+    thrs.push_back(std::thread(appThread, i));
+  }
+  for (auto& t : thrs) {
+    t.join();
+  }
+
+  for (auto& kv : map) {
+    // Make sure every thread succesfully inserted it's ID into every vec
+    std::set<intptr_t> idsFound;
+    for (auto& elem : kv.second) {
+      EXPECT_TRUE(idsFound.insert(elem).second);  // check for dups
+    }
+    EXPECT_EQ(idsFound.size(), nthrs); // check they are all there
+  }
+}
+
+TEST(PackedSyncPtr, extraData) {
+  PackedSyncPtr<int> p;
+  p.init();
+  int* unaligned = reinterpret_cast<int*>(0xf003);
+  p.lock();
+  p.set(unaligned);
+  uintptr_t* bytes = reinterpret_cast<uintptr_t*>(&p);
+  LOG(INFO) << "Bytes integer is: 0x" << std::hex << *bytes;
+  EXPECT_EQ(p.get(), unaligned);
+  p.unlock();
+}
diff --git a/folly/test/ProducerConsumerQueueTest.cpp b/folly/test/ProducerConsumerQueueTest.cpp
new file mode 100644
index 00000000..d322f032
--- /dev/null
+++ b/folly/test/ProducerConsumerQueueTest.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/ProducerConsumerQueue.h"
+
+#include <gtest/gtest.h>
+#include <vector>
+#include <atomic>
+#include <chrono>
+#include <memory>
+#include <thread>
+#include <glog/logging.h>
+
+//////////////////////////////////////////////////////////////////////
+
+namespace {
+
+template<class T> struct TestTraits {
+  T limit() const { return 1 << 24; }
+  T generate() const { return rand() % 26; }
+};
+
+template<> struct TestTraits<std::string> {
+  int limit() const { return 1 << 21; }
+  std::string generate() const { return std::string(12, ' '); }
+};
+
+template<class QueueType, size_t Size>
+struct PerfTest {
+  typedef typename QueueType::value_type T;
+
+  explicit PerfTest() : queue_(Size), done_(false) {}
+
+  void operator()() {
+    using namespace std::chrono;
+    auto const startTime = system_clock::now();
+
+    std::thread producer([this] { this->producer(); });
+    std::thread consumer([this] { this->consumer(); });
+
+    producer.join();
+    done_ = true;
+    consumer.join();
+
+    auto duration = duration_cast<milliseconds>(
+      system_clock::now() - startTime);
+    LOG(INFO) << "     done: " << duration.count() << "ms";
+  }
+
+  void producer() {
+    for (int i = 0; i < traits_.limit(); ++i) {
+      while (!queue_.write(traits_.generate())) {
+      }
+    }
+  }
+
+  void consumer() {
+    while (!done_) {
+      T data;
+      queue_.read(data);
+    }
+  }
+
+  QueueType queue_;
+  std::atomic<bool> done_;
+  TestTraits<T> traits_;
+};
+
+template<class TestType> void doTest(const char* name) {
+  LOG(INFO) << "  testing: " << name;
+  std::unique_ptr<TestType> const t(new TestType());
+  (*t)();
+}
+
+template<class T> void perfTestType(const char* type) {
+  const size_t size = 0xfffe;
+
+  LOG(INFO) << "Type: " << type;
+  doTest<PerfTest<folly::ProducerConsumerQueue<T>,size> >(
+    "ProducerConsumerQueue");
+}
+
+template<class QueueType, size_t Size>
+struct CorrectnessTest {
+  typedef typename QueueType::value_type T;
+
+  explicit CorrectnessTest()
+    : queue_(Size)
+    , done_(false)
+  {
+    const size_t testSize = traits_.limit();
+    testData_.reserve(testSize);
+    for (int i = 0; i < testSize; ++i) {
+      testData_.push_back(traits_.generate());
+    }
+  }
+
+  void operator()() {
+    std::thread producer([this] { this->producer(); });
+    std::thread consumer([this] { this->consumer(); });
+
+    producer.join();
+    done_ = true;
+    consumer.join();
+  }
+
+  void producer() {
+    for (auto& data : testData_) {
+      while (!queue_.write(data)) {
+      }
+    }
+  }
+
+  void consumer() {
+    for (auto& expect : testData_) {
+    again:
+      T data;
+      if (!queue_.read(data)) {
+        if (done_) {
+          // Try one more read; unless there's a bug in the queue class
+          // there should still be more data sitting in the queue even
+          // though the producer thread exited.
+          if (!queue_.read(data)) {
+            EXPECT_TRUE(0 && "Finished too early ...");
+            return;
+          }
+        } else {
+          goto again;
+        }
+      }
+      EXPECT_EQ(data, expect);
+    }
+  }
+
+  std::vector<T> testData_;
+  QueueType queue_;
+  TestTraits<T> traits_;
+  std::atomic<bool> done_;
+};
+
+template<class T> void correctnessTestType(const std::string& type) {
+  LOG(INFO) << "Type: " << type;
+  doTest<CorrectnessTest<folly::ProducerConsumerQueue<T>,0xfffe> >(
+    "ProducerConsumerQueue");
+}
+
+struct DtorChecker {
+  static int numInstances;
+  DtorChecker() { ++numInstances; }
+  DtorChecker(const DtorChecker& o) { ++numInstances; }
+  ~DtorChecker() { --numInstances; }
+};
+
+int DtorChecker::numInstances = 0;
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+TEST(PCQ, QueueCorrectness) {
+  correctnessTestType<std::string>("string");
+  correctnessTestType<int>("int");
+  correctnessTestType<unsigned long long>("unsigned long long");
+}
+
+TEST(PCQ, PerfTest) {
+  perfTestType<std::string>("string");
+  perfTestType<int>("int");
+  perfTestType<unsigned long long>("unsigned long long");
+}
+
+TEST(PCQ, Destructor) {
+  // Test that orphaned elements in a ProducerConsumerQueue are
+  // destroyed.
+  {
+    folly::ProducerConsumerQueue<DtorChecker> queue(1024);
+    for (int i = 0; i < 10; ++i) {
+      EXPECT_TRUE(queue.write(DtorChecker()));
+    }
+
+    EXPECT_EQ(DtorChecker::numInstances, 10);
+
+    {
+      DtorChecker ignore;
+      EXPECT_TRUE(queue.read(ignore));
+      EXPECT_TRUE(queue.read(ignore));
+    }
+
+    EXPECT_EQ(DtorChecker::numInstances, 8);
+  }
+
+  EXPECT_EQ(DtorChecker::numInstances, 0);
+
+  // Test the same thing in the case that the queue write pointer has
+  // wrapped, but the read one hasn't.
+  {
+    folly::ProducerConsumerQueue<DtorChecker> queue(4);
+    for (int i = 0; i < 3; ++i) {
+      EXPECT_TRUE(queue.write(DtorChecker()));
+    }
+    EXPECT_EQ(DtorChecker::numInstances, 3);
+    {
+      DtorChecker ignore;
+      EXPECT_TRUE(queue.read(ignore));
+    }
+    EXPECT_EQ(DtorChecker::numInstances, 2);
+    EXPECT_TRUE(queue.write(DtorChecker()));
+    EXPECT_EQ(DtorChecker::numInstances, 3);
+  }
+  EXPECT_EQ(DtorChecker::numInstances, 0);
+}
diff --git a/folly/test/RWSpinLockTest.cpp b/folly/test/RWSpinLockTest.cpp
new file mode 100644
index 00000000..7c4c901e
--- /dev/null
+++ b/folly/test/RWSpinLockTest.cpp
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// @author xliu (xliux@fb.com)
+//
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <vector>
+
+#include <boost/thread.hpp>
+
+#include "gtest/gtest.h"
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include "folly/RWSpinLock.h"
+
+DEFINE_int32(num_threads, 8, "num threads");
+
+namespace {
+
+static const int kMaxReaders = 50;
+static std::atomic<bool> stopThread;
+using namespace folly;
+
+template<typename RWSpinLockT> struct RWSpinLockTest: public testing::Test {
+  typedef RWSpinLockT RWSpinLockType;
+};
+
+typedef testing::Types<RWSpinLock
+#if defined(__GNUC__) && (defined(__i386) || defined(__x86_64__) || \
+    defined(ARCH_K8))
+        , RWTicketSpinLockT<32, true>,
+        RWTicketSpinLockT<32, false>,
+        RWTicketSpinLockT<64, true>,
+        RWTicketSpinLockT<64, false>
+#endif
+> Implementations;
+
+TYPED_TEST_CASE(RWSpinLockTest, Implementations);
+
+template<typename RWSpinLockType>
+static void run(RWSpinLockType* lock) {
+  int64_t reads = 0;
+  int64_t writes = 0;
+  while (!stopThread.load(std::memory_order_acquire)) {
+    if (rand() % 10 == 0) { // write
+      typename RWSpinLockType::WriteHolder guard(lock);
+      ++writes;
+    } else { // read
+      typename RWSpinLockType::ReadHolder guard(lock);
+      ++reads;
+    }
+  }
+  VLOG(0) << "total reads: " << reads << "; total writes: " << writes;
+}
+
+
+TYPED_TEST(RWSpinLockTest, Writer_Wait_Readers) {
+  typedef typename TestFixture::RWSpinLockType RWSpinLockType;
+  RWSpinLockType l;
+
+  for (int i = 0; i < kMaxReaders; ++i) {
+    EXPECT_TRUE(l.try_lock_shared());
+    EXPECT_FALSE(l.try_lock());
+  }
+
+  for (int i = 0; i < kMaxReaders; ++i) {
+    EXPECT_FALSE(l.try_lock());
+    l.unlock_shared();
+  }
+
+  EXPECT_TRUE(l.try_lock());
+}
+
+TYPED_TEST(RWSpinLockTest, Readers_Wait_Writer) {
+  typedef typename TestFixture::RWSpinLockType RWSpinLockType;
+  RWSpinLockType l;
+
+  EXPECT_TRUE(l.try_lock());
+
+  for (int i = 0; i < kMaxReaders; ++i) {
+    EXPECT_FALSE(l.try_lock_shared());
+  }
+
+  l.unlock_and_lock_shared();
+  for (int i = 0; i < kMaxReaders - 1; ++i) {
+    EXPECT_TRUE(l.try_lock_shared());
+  }
+}
+
+TYPED_TEST(RWSpinLockTest, Writer_Wait_Writer) {
+  typedef typename TestFixture::RWSpinLockType RWSpinLockType;
+  RWSpinLockType l;
+
+  EXPECT_TRUE(l.try_lock());
+  EXPECT_FALSE(l.try_lock());
+  l.unlock();
+
+  EXPECT_TRUE(l.try_lock());
+  EXPECT_FALSE(l.try_lock());
+}
+
+TYPED_TEST(RWSpinLockTest, Read_Holders) {
+  typedef typename TestFixture::RWSpinLockType RWSpinLockType;
+  RWSpinLockType l;
+
+  {
+    typename RWSpinLockType::ReadHolder guard(&l);
+    EXPECT_FALSE(l.try_lock());
+    EXPECT_TRUE(l.try_lock_shared());
+    l.unlock_shared();
+
+    EXPECT_FALSE(l.try_lock());
+  }
+
+  EXPECT_TRUE(l.try_lock());
+  l.unlock();
+}
+
+TYPED_TEST(RWSpinLockTest, Write_Holders) {
+  typedef typename TestFixture::RWSpinLockType RWSpinLockType;
+  RWSpinLockType l;
+  {
+    typename RWSpinLockType::WriteHolder guard(&l);
+    EXPECT_FALSE(l.try_lock());
+    EXPECT_FALSE(l.try_lock_shared());
+  }
+
+  EXPECT_TRUE(l.try_lock_shared());
+  EXPECT_FALSE(l.try_lock());
+  l.unlock_shared();
+  EXPECT_TRUE(l.try_lock());
+}
+
+TYPED_TEST(RWSpinLockTest, ConcurrentTests) {
+  typedef typename TestFixture::RWSpinLockType RWSpinLockType;
+  RWSpinLockType l;
+  srand(time(NULL));
+
+  std::vector<boost::thread> threads;
+  for (int i = 0; i < FLAGS_num_threads; ++i) {
+    threads.push_back(boost::thread(&run<RWSpinLockType>, &l));
+  }
+
+  sleep(1);
+  stopThread.store(true, std::memory_order_release);
+
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
+// RWSpinLock specific tests
+
+TEST(RWSpinLock, lock_unlock_tests) {
+  folly::RWSpinLock lock;
+  EXPECT_TRUE(lock.try_lock_upgrade());
+  EXPECT_TRUE(lock.try_lock_shared());
+  EXPECT_FALSE(lock.try_lock());
+  EXPECT_FALSE(lock.try_lock_upgrade());
+  lock.unlock_upgrade();
+  EXPECT_FALSE(lock.try_lock());
+  EXPECT_TRUE(lock.try_lock_upgrade());
+  lock.unlock_upgrade();
+  lock.unlock_shared();
+  EXPECT_TRUE(lock.try_lock());
+  EXPECT_FALSE(lock.try_lock_upgrade());
+  lock.unlock_and_lock_upgrade();
+  EXPECT_TRUE(lock.try_lock_shared());
+  lock.unlock_shared();
+  lock.unlock_upgrade_and_lock_shared();
+  lock.unlock_shared();
+  EXPECT_EQ(0, lock.bits());
+}
+
+TEST(RWSpinLock, concurrent_holder_test) {
+  srand(time(NULL));
+
+  folly::RWSpinLock lock;
+  std::atomic<int64_t> reads(0);
+  std::atomic<int64_t> writes(0);
+  std::atomic<int64_t> upgrades(0);
+  std::atomic<bool> stop(false);
+
+  auto go = [&] {
+    while (!stop.load(std::memory_order_acquire)) {
+      auto r = (uint32_t)(rand()) % 10;
+      if (r < 3) {          // starts from write lock
+        RWSpinLock::ReadHolder rg(
+            RWSpinLock::UpgradedHolder ug(
+              RWSpinLock::WriteHolder(&lock)));
+        writes.fetch_add(1, std::memory_order_acq_rel);;
+
+      } else if (r < 6) {   // starts from upgrade lock
+        RWSpinLock::UpgradedHolder ug(&lock);
+        if (r < 4) {
+          RWSpinLock::WriteHolder wg(std::move(ug));
+        } else {
+          RWSpinLock::ReadHolder rg(std::move(ug));
+        }
+        upgrades.fetch_add(1, std::memory_order_acq_rel);;
+      } else {
+        RWSpinLock::UpgradedHolder ug(
+            RWSpinLock::WriteHolder(
+              RWSpinLock::ReadHolder(&lock)));
+        reads.fetch_add(1, std::memory_order_acq_rel);
+      }
+    }
+  };
+
+  std::vector<boost::thread> threads;
+  for (int i = 0; i < FLAGS_num_threads; ++i) {
+    threads.push_back(boost::thread(go));
+  }
+
+  sleep(5);
+  stop.store(true, std::memory_order_release);
+
+  for (auto& t : threads) t.join();
+
+  LOG(INFO) << "reads: " << reads.load(std::memory_order_acquire)
+    << "; writes: " << writes.load(std::memory_order_acquire)
+    << "; upgrades: " << upgrades.load(std::memory_order_acquire);
+}
+
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
diff --git a/folly/test/RangeTest.cpp b/folly/test/RangeTest.cpp
new file mode 100644
index 00000000..a98e142f
--- /dev/null
+++ b/folly/test/RangeTest.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// @author Kristina Holst (kholst@fb.com)
+// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
+
+#include <boost/range/concepts.hpp>
+#include <gtest/gtest.h>
+#include "folly/Range.h"
+
+using namespace folly;
+using namespace std;
+
+BOOST_CONCEPT_ASSERT((boost::RandomAccessRangeConcept<StringPiece>));
+
+TEST(StringPiece, All) {
+  const char* foo = "foo";
+  const char* foo2 = "foo";
+  string fooStr(foo);
+  string foo2Str(foo2);
+
+  // we expect the compiler to optimize things so that there's only one copy
+  // of the string literal "foo", even though we've got it in multiple places
+  EXPECT_EQ(foo, foo2);  // remember, this uses ==, not strcmp, so it's a ptr
+                         // comparison rather than lexical
+
+  // the string object creates copies though, so the c_str of these should be
+  // distinct
+  EXPECT_NE(fooStr.c_str(), foo2Str.c_str());
+
+  // test the basic StringPiece functionality
+  StringPiece s(foo);
+  EXPECT_EQ(s.size(), 3);
+
+  EXPECT_EQ(s.start(), foo);              // ptr comparison
+  EXPECT_NE(s.start(), fooStr.c_str());   // ptr comparison
+  EXPECT_NE(s.start(), foo2Str.c_str());  // ptr comparison
+
+  EXPECT_EQ(s.toString(), foo);              // lexical comparison
+  EXPECT_EQ(s.toString(), fooStr.c_str());   // lexical comparison
+  EXPECT_EQ(s.toString(), foo2Str.c_str());  // lexical comparison
+
+  EXPECT_EQ(s, foo);                      // lexical comparison
+  EXPECT_EQ(s, fooStr);                   // lexical comparison
+  EXPECT_EQ(s, foo2Str);                  // lexical comparison
+  EXPECT_EQ(foo, s);
+
+  // check using StringPiece to reference substrings
+  const char* foobarbaz = "foobarbaz";
+
+  // the full "foobarbaz"
+  s.reset(foobarbaz, strlen(foobarbaz));
+  EXPECT_EQ(s.size(), 9);
+  EXPECT_EQ(s.start(), foobarbaz);
+  EXPECT_EQ(s, "foobarbaz");
+
+  // only the 'foo'
+  s.assign(foobarbaz, foobarbaz + 3);
+  EXPECT_EQ(s.size(), 3);
+  EXPECT_EQ(s.start(), foobarbaz);
+  EXPECT_EQ(s, "foo");
+
+  // find
+  s.reset(foobarbaz, strlen(foobarbaz));
+  EXPECT_EQ(s.find("bar"), 3);
+  EXPECT_EQ(s.find("ba", 3), 3);
+  EXPECT_EQ(s.find("ba", 4), 6);
+  EXPECT_EQ(s.find("notfound"), StringPiece::npos);
+  EXPECT_EQ(s.find("notfound", 1), StringPiece::npos);
+  EXPECT_EQ(s.find("bar", 4), StringPiece::npos);  // starting position too far
+  // starting pos that is obviously past the end -- This works for std::string
+  EXPECT_EQ(s.toString().find("notfound", 55), StringPiece::npos);
+  EXPECT_EQ(s.find("z", s.size()), StringPiece::npos);
+  EXPECT_EQ(s.find("z", 55), StringPiece::npos);
+
+  // just "barbaz"
+  s.reset(foobarbaz + 3, strlen(foobarbaz + 3));
+  EXPECT_EQ(s.size(), 6);
+  EXPECT_EQ(s.start(), foobarbaz + 3);
+  EXPECT_EQ(s, "barbaz");
+
+  // just "bar"
+  s.reset(foobarbaz + 3, 3);
+  EXPECT_EQ(s.size(), 3);
+  EXPECT_EQ(s, "bar");
+
+  // clear
+  s.clear();
+  EXPECT_EQ(s.toString(), "");
+
+  // test an empty StringPiece
+  StringPiece s2;
+  EXPECT_EQ(s2.size(), 0);
+
+  // Test comparison operators
+  foo = "";
+  EXPECT_LE(s, foo);
+  EXPECT_LE(foo, s);
+  EXPECT_GE(s, foo);
+  EXPECT_GE(foo, s);
+  EXPECT_EQ(s, foo);
+  EXPECT_EQ(foo, s);
+
+  foo = "abc";
+  EXPECT_LE(s, foo);
+  EXPECT_LT(s, foo);
+  EXPECT_GE(foo, s);
+  EXPECT_GT(foo, s);
+  EXPECT_NE(s, foo);
+
+  EXPECT_LE(s, s);
+  EXPECT_LE(s, s);
+  EXPECT_GE(s, s);
+  EXPECT_GE(s, s);
+  EXPECT_EQ(s, s);
+  EXPECT_EQ(s, s);
+
+  s = "abc";
+  s2 = "abc";
+  EXPECT_LE(s, s2);
+  EXPECT_LE(s2, s);
+  EXPECT_GE(s, s2);
+  EXPECT_GE(s2, s);
+  EXPECT_EQ(s, s2);
+  EXPECT_EQ(s2, s);
+}
diff --git a/folly/test/ScopeGuardTest.cpp b/folly/test/ScopeGuardTest.cpp
new file mode 100644
index 00000000..b84f18ce
--- /dev/null
+++ b/folly/test/ScopeGuardTest.cpp
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/ScopeGuard.h"
+
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+
+#include <functional>
+#include <stdexcept>
+
+using folly::ScopeGuard;
+using folly::makeGuard;
+using std::vector;
+
+double returnsDouble() {
+  return 0.0;
+}
+
+class MyFunctor {
+ public:
+  explicit MyFunctor(int* ptr) : ptr_(ptr) {}
+
+  void operator()() {
+    ++*ptr_;
+  }
+
+ private:
+  int* ptr_;
+};
+
+TEST(ScopeGuard, DifferentWaysToBind) {
+  {
+    // There is implicit conversion from func pointer
+    // double (*)() to function<void()>.
+    ScopeGuard g = makeGuard(returnsDouble);
+  }
+
+  vector<int> v;
+  void (vector<int>::*push_back)(int const&) = &vector<int>::push_back;
+
+  v.push_back(1);
+  {
+    // binding to member function.
+    ScopeGuard g = makeGuard(std::bind(&vector<int>::pop_back, &v));
+  }
+  EXPECT_EQ(0, v.size());
+
+  {
+    // bind member function with args. v is passed-by-value!
+    ScopeGuard g = makeGuard(std::bind(push_back, v, 2));
+  }
+  EXPECT_EQ(0, v.size()); // push_back happened on a copy of v... fail!
+
+  // pass in an argument by pointer so to avoid copy.
+  {
+    ScopeGuard g = makeGuard(std::bind(push_back, &v, 4));
+  }
+  EXPECT_EQ(1, v.size());
+
+  {
+    // pass in an argument by reference so to avoid copy.
+    ScopeGuard g = makeGuard(std::bind(push_back, std::ref(v), 4));
+  }
+  EXPECT_EQ(2, v.size());
+
+  // lambda with a reference to v
+  {
+    ScopeGuard g = makeGuard([&] { v.push_back(5); });
+  }
+  EXPECT_EQ(3, v.size());
+
+  // lambda with a copy of v
+  {
+    ScopeGuard g = makeGuard([v] () mutable { v.push_back(6); });
+  }
+  EXPECT_EQ(3, v.size());
+
+  // functor object
+  int n = 0;
+  {
+    MyFunctor f(&n);
+    ScopeGuard g = makeGuard(f);
+  }
+  EXPECT_EQ(1, n);
+
+  // temporary functor object
+  n = 0;
+  {
+    ScopeGuard g = makeGuard(MyFunctor(&n));
+  }
+  EXPECT_EQ(1, n);
+
+  // Use auto instead of ScopeGuard
+  n = 2;
+  {
+    auto g = makeGuard(MyFunctor(&n));
+  }
+  EXPECT_EQ(3, n);
+
+  // Use const auto& instead of ScopeGuard
+  n = 10;
+  {
+    const auto& g = makeGuard(MyFunctor(&n));
+  }
+  EXPECT_EQ(11, n);
+}
+
+TEST(ScopeGuard, GuardException) {
+  EXPECT_DEATH({
+    ScopeGuard g = makeGuard([&] {
+      throw std::runtime_error("destructors should never throw!");
+    });
+  },
+  "destructors should never throw");
+}
+
+/**
+ * Add an integer to a vector iff it was inserted into the
+ * db successfuly. Here is a schematic of how you would accomplish
+ * this with scope guard.
+ */
+void testUndoAction(bool failure) {
+  vector<int64_t> v;
+  { // defines a "mini" scope
+
+    // be optimistic and insert this into memory
+    v.push_back(1);
+
+    // The guard is triggered to undo the insertion unless dismiss() is called.
+    ScopeGuard guard = makeGuard([&] { v.pop_back(); });
+
+    // Do some action; Use the failure argument to pretend
+    // if it failed or succeeded.
+
+    // if there was no failure, dismiss the undo guard action.
+    if (!failure) {
+      guard.dismiss();
+    }
+  } // all stack allocated in the mini-scope will be destroyed here.
+
+  if (failure) {
+    EXPECT_EQ(0, v.size()); // the action failed => undo insertion
+  } else {
+    EXPECT_EQ(1, v.size()); // the action succeeded => keep insertion
+  }
+}
+
+TEST(ScopeGuard, UndoAction) {
+  testUndoAction(true);
+  testUndoAction(false);
+}
+
+/**
+ * Sometimes in a try catch block we want to execute a piece of code
+ * regardless if an exception happened or not. For example, you want
+ * to close a db connection regardless if an exception was thrown during
+ * insertion. In Java and other languages there is a finally clause that
+ * helps accomplish this:
+ *
+ *   try {
+ *     dbConn.doInsert(sql);
+ *   } catch (const DbException& dbe) {
+ *     dbConn.recordFailure(dbe);
+ *   } catch (const CriticalException& e) {
+ *     throw e; // re-throw the exception
+ *   } finally {
+ *     dbConn.closeConnection(); // executes no matter what!
+ *   }
+ *
+ * We can approximate this behavior in C++ with ScopeGuard.
+ */
+enum class ErrorBehavior {
+  SUCCESS,
+  HANDLED_ERROR,
+  UNHANDLED_ERROR,
+};
+
+void testFinally(ErrorBehavior error) {
+  bool cleanupOccurred = false;
+
+  try {
+    ScopeGuard guard = makeGuard([&] { cleanupOccurred = true; });
+
+    try {
+      if (error == ErrorBehavior::HANDLED_ERROR) {
+        throw std::runtime_error("throwing an expected error");
+      } else if (error == ErrorBehavior::UNHANDLED_ERROR) {
+        throw "never throw raw strings";
+      }
+    } catch (const std::runtime_error&) {
+    }
+  } catch (...) {
+    // Outer catch to swallow the error for the UNHANDLED_ERROR behavior
+  }
+
+  EXPECT_TRUE(cleanupOccurred);
+}
+
+TEST(ScopeGuard, TryCatchFinally) {
+  testFinally(ErrorBehavior::SUCCESS);
+  testFinally(ErrorBehavior::HANDLED_ERROR);
+  testFinally(ErrorBehavior::UNHANDLED_ERROR);
+}
+
+TEST(ScopeGuard, TEST_SCOPE_EXIT) {
+  int x = 0;
+  {
+    SCOPE_EXIT { ++x; };
+    EXPECT_EQ(0, x);
+  }
+  EXPECT_EQ(1, x);
+}
+
+class Foo {
+public:
+  Foo() {}
+  ~Foo() {
+    try {
+      auto e = std::current_exception();
+      int test = 0;
+      {
+        SCOPE_EXIT { ++test; };
+        EXPECT_EQ(0, test);
+      }
+      EXPECT_EQ(1, test);
+    } catch (const std::exception& ex) {
+      LOG(FATAL) << "Unexpected exception: " << ex.what();
+    }
+  }
+};
+
+TEST(ScopeGuard, TEST_SCOPE_FAILURE2) {
+  try {
+    Foo f;
+    throw std::runtime_error("test");
+  } catch (...) {
+  }
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
diff --git a/folly/test/SmallLocksTest.cpp b/folly/test/SmallLocksTest.cpp
new file mode 100644
index 00000000..1de46ff1
--- /dev/null
+++ b/folly/test/SmallLocksTest.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/SmallLocks.h"
+#include <cassert>
+#include <cstdio>
+#include <string>
+#include <vector>
+#include <pthread.h>
+#include <unistd.h>
+
+#include <thread>
+
+#include <gtest/gtest.h>
+
+using std::string;
+using folly::MicroSpinLock;
+using folly::PicoSpinLock;
+using folly::MSLGuard;
+
+namespace {
+
+struct LockedVal {
+  int ar[1024];
+  MicroSpinLock lock;
+
+  LockedVal() {
+    lock.init();
+    memset(ar, 0, sizeof ar);
+  }
+};
+
+// Compile time test for packed struct support (requires that both of
+// these classes are POD).
+struct ignore1 { MicroSpinLock msl; int16_t foo; } __attribute__((packed));
+struct ignore2 { PicoSpinLock<uint32_t> psl; int16_t foo; }
+  __attribute__((packed));
+static_assert(sizeof(ignore1) == 3, "Size check failed");
+static_assert(sizeof(ignore2) == 6, "Size check failed");
+
+LockedVal v;
+void splock_test() {
+
+  const int max = 1000;
+  unsigned int seed = (uintptr_t)pthread_self();
+  for (int i = 0; i < max; i++) {
+    asm("pause");
+    MSLGuard g(v.lock);
+
+    int first = v.ar[0];
+    for (int i = 1; i < sizeof v.ar / sizeof i; ++i) {
+      EXPECT_EQ(first, v.ar[i]);
+    }
+
+    int byte = rand_r(&seed);
+    memset(v.ar, char(byte), sizeof v.ar);
+  }
+}
+
+template<class T> struct PslTest {
+  PicoSpinLock<T> lock;
+
+  PslTest() { lock.init(); }
+
+  void doTest() {
+    T ourVal = rand() % (T(1) << (sizeof(T) * 8 - 1));
+    for (int i = 0; i < 10000; ++i) {
+      std::lock_guard<PicoSpinLock<T>> guard(lock);
+      lock.setData(ourVal);
+      for (int n = 0; n < 10; ++n) {
+        asm volatile("pause");
+        EXPECT_EQ(lock.getData(), ourVal);
+      }
+    }
+  }
+};
+
+template<class T>
+void doPslTest() {
+  PslTest<T> testObj;
+
+  const int nthrs = 17;
+  std::vector<std::thread> threads;
+  for (int i = 0; i < nthrs; ++i) {
+    threads.push_back(std::thread(&PslTest<T>::doTest, &testObj));
+  }
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
+}
+
+TEST(SmallLocks, SpinLockCorrectness) {
+  EXPECT_EQ(sizeof(MicroSpinLock), 1);
+
+  int nthrs = sysconf(_SC_NPROCESSORS_ONLN) * 2;
+  std::vector<std::thread> threads;
+  for (int i = 0; i < nthrs; ++i) {
+    threads.push_back(std::thread(splock_test));
+  }
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
+TEST(SmallLocks, PicoSpinCorrectness) {
+  doPslTest<int16_t>();
+  doPslTest<uint16_t>();
+  doPslTest<int32_t>();
+  doPslTest<uint32_t>();
+  doPslTest<int64_t>();
+  doPslTest<uint64_t>();
+}
+
+TEST(SmallLocks, PicoSpinSigned) {
+  typedef PicoSpinLock<int16_t,0> Lock;
+  Lock val;
+  val.init(-4);
+  EXPECT_EQ(val.getData(), -4);
+
+  {
+    std::lock_guard<Lock> guard(val);
+    EXPECT_EQ(val.getData(), -4);
+    val.setData(-8);
+    EXPECT_EQ(val.getData(), -8);
+  }
+  EXPECT_EQ(val.getData(), -8);
+}
diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp
new file mode 100644
index 00000000..eab3912b
--- /dev/null
+++ b/folly/test/StringTest.cpp
@@ -0,0 +1,645 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/String.h"
+
+#include <random>
+#include <boost/algorithm/string.hpp>
+#include <gtest/gtest.h>
+
+#include "folly/Benchmark.h"
+
+using namespace folly;
+using namespace std;
+
+TEST(StringPrintf, BasicTest) {
+  EXPECT_EQ("abc", stringPrintf("%s", "abc"));
+  EXPECT_EQ("abc", stringPrintf("%sbc", "a"));
+  EXPECT_EQ("abc", stringPrintf("a%sc", "b"));
+  EXPECT_EQ("abc", stringPrintf("ab%s", "c"));
+
+  EXPECT_EQ("abc", stringPrintf("abc"));
+}
+
+TEST(StringPrintf, NumericFormats) {
+  EXPECT_EQ("12", stringPrintf("%d", 12));
+  EXPECT_EQ("5000000000", stringPrintf("%ld", 5000000000UL));
+  EXPECT_EQ("5000000000", stringPrintf("%ld", 5000000000L));
+  EXPECT_EQ("-5000000000", stringPrintf("%ld", -5000000000L));
+  EXPECT_EQ("-1", stringPrintf("%d", 0xffffffff));
+  EXPECT_EQ("-1", stringPrintf("%ld", 0xffffffffffffffff));
+  EXPECT_EQ("-1", stringPrintf("%ld", 0xffffffffffffffffUL));
+
+  EXPECT_EQ("7.7", stringPrintf("%1.1f", 7.7));
+  EXPECT_EQ("7.7", stringPrintf("%1.1lf", 7.7));
+  EXPECT_EQ("7.70000000000000018",
+            stringPrintf("%.17f", 7.7));
+  EXPECT_EQ("7.70000000000000018",
+            stringPrintf("%.17lf", 7.7));
+}
+
+TEST(StringPrintf, Appending) {
+  string s;
+  stringAppendf(&s, "a%s", "b");
+  stringAppendf(&s, "%c", 'c');
+  EXPECT_EQ(s, "abc");
+  stringAppendf(&s, " %d", 123);
+  EXPECT_EQ(s, "abc 123");
+}
+
+TEST(StringPrintf, VariousSizes) {
+  // Test a wide variety of output sizes
+  for (int i = 0; i < 100; ++i) {
+    string expected(i + 1, 'a');
+    EXPECT_EQ("X" + expected + "X", stringPrintf("X%sX", expected.c_str()));
+  }
+
+  EXPECT_EQ("abc12345678910111213141516171819202122232425xyz",
+            stringPrintf("abc%d%d%d%d%d%d%d%d%d%d%d%d%d%d"
+                         "%d%d%d%d%d%d%d%d%d%d%dxyz",
+                         1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+                         17, 18, 19, 20, 21, 22, 23, 24, 25));
+}
+
+TEST(StringPrintf, oldStringPrintfTests) {
+  EXPECT_EQ(string("a/b/c/d"),
+            stringPrintf("%s/%s/%s/%s", "a", "b", "c", "d"));
+
+  EXPECT_EQ(string("    5    10"),
+            stringPrintf("%5d %5d", 5, 10));
+
+  // check printing w/ a big buffer
+  for (int size = (1 << 8); size <= (1 << 15); size <<= 1) {
+    string a(size, 'z');
+    string b = stringPrintf("%s", a.c_str());
+    EXPECT_EQ(a.size(), b.size());
+  }
+}
+
+TEST(StringPrintf, oldStringAppendf) {
+  string s = "hello";
+  stringAppendf(&s, "%s/%s/%s/%s", "a", "b", "c", "d");
+  EXPECT_EQ(string("helloa/b/c/d"), s);
+}
+
+BENCHMARK(new_stringPrintfSmall, iters) {
+  for (int64_t i = 0; i < iters; ++i) {
+    int32_t x = int32_t(i);
+    int32_t y = int32_t(i + 1);
+    string s =
+      stringPrintf("msg msg msg msg msg msg msg msg:  %d, %d, %s",
+                   x, y, "hello");
+  }
+}
+
+TEST(Escape, cEscape) {
+  EXPECT_EQ("hello world", cEscape<std::string>("hello world"));
+  EXPECT_EQ("hello \\\\world\\\" goodbye",
+            cEscape<std::string>("hello \\world\" goodbye"));
+  EXPECT_EQ("hello\\nworld", cEscape<std::string>("hello\nworld"));
+  EXPECT_EQ("hello\\377\\376", cEscape<std::string>("hello\xff\xfe"));
+}
+
+TEST(Escape, cUnescape) {
+  EXPECT_EQ("hello world", cUnescape<std::string>("hello world"));
+  EXPECT_EQ("hello \\world\" goodbye",
+            cUnescape<std::string>("hello \\\\world\\\" goodbye"));
+  EXPECT_EQ("hello\nworld", cUnescape<std::string>("hello\\nworld"));
+  EXPECT_EQ("hello\nworld", cUnescape<std::string>("hello\\012world"));
+  EXPECT_EQ("hello\nworld", cUnescape<std::string>("hello\\x0aworld"));
+  EXPECT_EQ("hello\xff\xfe", cUnescape<std::string>("hello\\377\\376"));
+  EXPECT_EQ("hello\xff\xfe", cUnescape<std::string>("hello\\xff\\xfe"));
+
+  EXPECT_THROW({cUnescape<std::string>("hello\\");},
+               std::invalid_argument);
+  EXPECT_THROW({cUnescape<std::string>("hello\\x");},
+               std::invalid_argument);
+  EXPECT_THROW({cUnescape<std::string>("hello\\q");},
+               std::invalid_argument);
+}
+
+namespace {
+fbstring bmString;
+fbstring bmEscapedString;
+fbstring escapedString;
+fbstring unescapedString;
+const size_t kBmStringLength = 64 << 10;
+const uint32_t kPrintablePercentage = 90;
+
+void initBenchmark() {
+  bmString.reserve(kBmStringLength);
+
+  std::mt19937 rnd;
+  std::uniform_int_distribution<uint32_t> printable(32, 126);
+  std::uniform_int_distribution<uint32_t> nonPrintable(0, 160);
+  std::uniform_int_distribution<uint32_t> percentage(0, 99);
+
+  for (size_t i = 0; i < kBmStringLength; ++i) {
+    unsigned char c;
+    if (percentage(rnd) < kPrintablePercentage) {
+      c = printable(rnd);
+    } else {
+      c = nonPrintable(rnd);
+      // Generate characters in both non-printable ranges:
+      // 0..31 and 127..255
+      if (c >= 32) {
+        c += (126 - 32) + 1;
+      }
+    }
+    bmString.push_back(c);
+  }
+
+  bmEscapedString = cEscape<fbstring>(bmString);
+}
+
+BENCHMARK(BM_cEscape, iters) {
+  while (iters--) {
+    escapedString = cEscape<fbstring>(bmString);
+    doNotOptimizeAway(escapedString.size());
+  }
+}
+
+BENCHMARK(BM_cUnescape, iters) {
+  while (iters--) {
+    unescapedString = cUnescape<fbstring>(bmEscapedString);
+    doNotOptimizeAway(unescapedString.size());
+  }
+}
+
+}  // namespace
+
+namespace {
+
+double pow2(int exponent) {
+  return double(int64_t(1) << exponent);
+}
+
+}  // namespace
+
+TEST(PrettyPrint, Basic) {
+  // check time printing
+  EXPECT_EQ(string("8.53e+07 s "), prettyPrint(85.3e6, PRETTY_TIME));
+  EXPECT_EQ(string("85.3 s "), prettyPrint(85.3, PRETTY_TIME));
+  EXPECT_EQ(string("85.3 ms"), prettyPrint(85.3e-3, PRETTY_TIME));
+  EXPECT_EQ(string("85.3 us"), prettyPrint(85.3e-6, PRETTY_TIME));
+  EXPECT_EQ(string("85.3 ns"), prettyPrint(85.3e-9, PRETTY_TIME));
+  EXPECT_EQ(string("85.3 ps"), prettyPrint(85.3e-12, PRETTY_TIME));
+  EXPECT_EQ(string("8.53e-14 s "), prettyPrint(85.3e-15, PRETTY_TIME));
+
+  EXPECT_EQ(string("0 s "), prettyPrint(0, PRETTY_TIME));
+  EXPECT_EQ(string("1 s "), prettyPrint(1.0, PRETTY_TIME));
+  EXPECT_EQ(string("1 ms"), prettyPrint(1.0e-3, PRETTY_TIME));
+  EXPECT_EQ(string("1 us"), prettyPrint(1.0e-6, PRETTY_TIME));
+  EXPECT_EQ(string("1 ns"), prettyPrint(1.0e-9, PRETTY_TIME));
+  EXPECT_EQ(string("1 ps"), prettyPrint(1.0e-12, PRETTY_TIME));
+
+  // check bytes printing
+  EXPECT_EQ(string("853 B "), prettyPrint(853., PRETTY_BYTES));
+  EXPECT_EQ(string("833 kB"), prettyPrint(853.e3, PRETTY_BYTES));
+  EXPECT_EQ(string("813.5 MB"), prettyPrint(853.e6, PRETTY_BYTES));
+  EXPECT_EQ(string("7.944 GB"), prettyPrint(8.53e9, PRETTY_BYTES));
+  EXPECT_EQ(string("794.4 GB"), prettyPrint(853.e9, PRETTY_BYTES));
+  EXPECT_EQ(string("775.8 TB"), prettyPrint(853.e12, PRETTY_BYTES));
+
+  EXPECT_EQ(string("0 B "), prettyPrint(0, PRETTY_BYTES));
+  EXPECT_EQ(string("1 B "), prettyPrint(pow2(0), PRETTY_BYTES));
+  EXPECT_EQ(string("1 kB"), prettyPrint(pow2(10), PRETTY_BYTES));
+  EXPECT_EQ(string("1 MB"), prettyPrint(pow2(20), PRETTY_BYTES));
+  EXPECT_EQ(string("1 GB"), prettyPrint(pow2(30), PRETTY_BYTES));
+  EXPECT_EQ(string("1 TB"), prettyPrint(pow2(40), PRETTY_BYTES));
+
+  // check bytes metric printing
+  EXPECT_EQ(string("853 B "), prettyPrint(853., PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("853 kB"), prettyPrint(853.e3, PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("853 MB"), prettyPrint(853.e6, PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("8.53 GB"), prettyPrint(8.53e9, PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("853 GB"), prettyPrint(853.e9, PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("853 TB"), prettyPrint(853.e12, PRETTY_BYTES_METRIC));
+
+  EXPECT_EQ(string("0 B "), prettyPrint(0, PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("1 B "), prettyPrint(1.0, PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("1 kB"), prettyPrint(1.0e+3, PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("1 MB"), prettyPrint(1.0e+6, PRETTY_BYTES_METRIC));
+
+  EXPECT_EQ(string("1 GB"), prettyPrint(1.0e+9, PRETTY_BYTES_METRIC));
+  EXPECT_EQ(string("1 TB"), prettyPrint(1.0e+12, PRETTY_BYTES_METRIC));
+
+  // check metric-units (powers of 1000) printing
+  EXPECT_EQ(string("853  "), prettyPrint(853., PRETTY_UNITS_METRIC));
+  EXPECT_EQ(string("853 k"), prettyPrint(853.e3, PRETTY_UNITS_METRIC));
+  EXPECT_EQ(string("853 M"), prettyPrint(853.e6, PRETTY_UNITS_METRIC));
+  EXPECT_EQ(string("8.53 bil"), prettyPrint(8.53e9, PRETTY_UNITS_METRIC));
+  EXPECT_EQ(string("853 bil"), prettyPrint(853.e9, PRETTY_UNITS_METRIC));
+  EXPECT_EQ(string("853 tril"), prettyPrint(853.e12, PRETTY_UNITS_METRIC));
+
+  // check binary-units (powers of 1024) printing
+  EXPECT_EQ(string("0  "), prettyPrint(0, PRETTY_UNITS_BINARY));
+  EXPECT_EQ(string("1  "), prettyPrint(pow2(0), PRETTY_UNITS_BINARY));
+  EXPECT_EQ(string("1 k"), prettyPrint(pow2(10), PRETTY_UNITS_BINARY));
+  EXPECT_EQ(string("1 M"), prettyPrint(pow2(20), PRETTY_UNITS_BINARY));
+  EXPECT_EQ(string("1 G"), prettyPrint(pow2(30), PRETTY_UNITS_BINARY));
+  EXPECT_EQ(string("1 T"), prettyPrint(pow2(40), PRETTY_UNITS_BINARY));
+
+  EXPECT_EQ(string("1023  "),
+      prettyPrint(pow2(10) - 1, PRETTY_UNITS_BINARY));
+  EXPECT_EQ(string("1024 k"),
+      prettyPrint(pow2(20) - 1, PRETTY_UNITS_BINARY));
+  EXPECT_EQ(string("1024 M"),
+      prettyPrint(pow2(30) - 1, PRETTY_UNITS_BINARY));
+  EXPECT_EQ(string("1024 G"),
+      prettyPrint(pow2(40) - 1, PRETTY_UNITS_BINARY));
+
+  // check that negative values work
+  EXPECT_EQ(string("-85.3 s "), prettyPrint(-85.3, PRETTY_TIME));
+  EXPECT_EQ(string("-85.3 ms"), prettyPrint(-85.3e-3, PRETTY_TIME));
+  EXPECT_EQ(string("-85.3 us"), prettyPrint(-85.3e-6, PRETTY_TIME));
+  EXPECT_EQ(string("-85.3 ns"), prettyPrint(-85.3e-9, PRETTY_TIME));
+}
+
+TEST(PrettyPrint, HexDump) {
+  std::string a("abc\x00\x02\xa0", 6);  // embedded NUL
+  EXPECT_EQ(
+    "00000000  61 62 63 00 02 a0                                 "
+    "|abc...          |\n",
+    hexDump(a.data(), a.size()));
+
+  a = "abcdefghijklmnopqrstuvwxyz";
+  EXPECT_EQ(
+    "00000000  61 62 63 64 65 66 67 68  69 6a 6b 6c 6d 6e 6f 70  "
+    "|abcdefghijklmnop|\n"
+    "00000010  71 72 73 74 75 76 77 78  79 7a                    "
+    "|qrstuvwxyz      |\n",
+    hexDump(a.data(), a.size()));
+}
+
+TEST(System, errnoStr) {
+  errno = EACCES;
+  EXPECT_EQ(EACCES, errno);
+  EXPECT_EQ(EACCES, errno);  // twice to make sure EXPECT_EQ doesn't change it
+
+  fbstring expected = strerror(ENOENT);
+
+  errno = EACCES;
+  EXPECT_EQ(expected, errnoStr(ENOENT));
+  // Ensure that errno isn't changed
+  EXPECT_EQ(EACCES, errno);
+
+  // Per POSIX, all errno values are positive, so -1 is invalid
+  errnoStr(-1);
+
+  // Ensure that errno isn't changed
+  EXPECT_EQ(EACCES, errno);
+}
+
+namespace folly_test {
+struct ThisIsAVeryLongStructureName {
+};
+}  // namespace folly_test
+
+TEST(System, demangle) {
+  EXPECT_EQ("folly_test::ThisIsAVeryLongStructureName",
+            demangle(typeid(folly_test::ThisIsAVeryLongStructureName)));
+}
+
+namespace {
+
+template<template<class,class> class VectorType>
+void splitTest() {
+  VectorType<string,std::allocator<string> > parts;
+
+  folly::split(',', "a,b,c", parts);
+  EXPECT_EQ(parts.size(), 3);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "b");
+  EXPECT_EQ(parts[2], "c");
+  parts.clear();
+
+  folly::split(',', string("a,b,c"), parts);
+  EXPECT_EQ(parts.size(), 3);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "b");
+  EXPECT_EQ(parts[2], "c");
+  parts.clear();
+
+  folly::split(',', "a,,c", parts);
+  EXPECT_EQ(parts.size(), 3);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "");
+  EXPECT_EQ(parts[2], "c");
+  parts.clear();
+
+  folly::split(',', string("a,,c"), parts);
+  EXPECT_EQ(parts.size(), 3);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "");
+  EXPECT_EQ(parts[2], "c");
+  parts.clear();
+
+  folly::split(',', "a,,c", parts, true);
+  EXPECT_EQ(parts.size(), 2);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "c");
+  parts.clear();
+
+  folly::split(',', string("a,,c"), parts, true);
+  EXPECT_EQ(parts.size(), 2);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "c");
+  parts.clear();
+
+  folly::split(',', string(",,a,,c,,,"), parts, true);
+  EXPECT_EQ(parts.size(), 2);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "c");
+  parts.clear();
+
+  // test multiple split w/o clear
+  folly::split(',', ",,a,,c,,,", parts, true);
+  EXPECT_EQ(parts.size(), 2);
+  EXPECT_EQ(parts[0], "a");
+  EXPECT_EQ(parts[1], "c");
+  folly::split(',', ",,a,,c,,,", parts, true);
+  EXPECT_EQ(parts.size(), 4);
+  EXPECT_EQ(parts[2], "a");
+  EXPECT_EQ(parts[3], "c");
+  parts.clear();
+
+  // test splits that with multi-line delimiter
+  folly::split("ab", "dabcabkdbkab", parts, true);
+  EXPECT_EQ(parts.size(), 3);
+  EXPECT_EQ(parts[0], "d");
+  EXPECT_EQ(parts[1], "c");
+  EXPECT_EQ(parts[2], "kdbk");
+  parts.clear();
+
+  string orig = "ab2342asdfv~~!";
+  folly::split("", orig, parts, true);
+  EXPECT_EQ(parts.size(), 1);
+  EXPECT_EQ(parts[0], orig);
+  parts.clear();
+
+  folly::split("452x;o38asfsajsdlfdf.j", "asfds", parts, true);
+  EXPECT_EQ(parts.size(), 1);
+  EXPECT_EQ(parts[0], "asfds");
+  parts.clear();
+
+  folly::split("a", "", parts, true);
+  EXPECT_EQ(parts.size(), 0);
+  parts.clear();
+
+  folly::split("a", "", parts);
+  EXPECT_EQ(parts.size(), 1);
+  EXPECT_EQ(parts[0], "");
+  parts.clear();
+
+  folly::split("a", "abcdefg", parts, true);
+  EXPECT_EQ(parts.size(), 1);
+  EXPECT_EQ(parts[0], "bcdefg");
+  parts.clear();
+
+  orig = "All, , your bases, are , , belong to us";
+  folly::split(", ", orig, parts, true);
+  EXPECT_EQ(parts.size(), 4);
+  EXPECT_EQ(parts[0], "All");
+  EXPECT_EQ(parts[1], "your bases");
+  EXPECT_EQ(parts[2], "are ");
+  EXPECT_EQ(parts[3], "belong to us");
+  parts.clear();
+  folly::split(", ", orig, parts);
+  EXPECT_EQ(parts.size(), 6);
+  EXPECT_EQ(parts[0], "All");
+  EXPECT_EQ(parts[1], "");
+  EXPECT_EQ(parts[2], "your bases");
+  EXPECT_EQ(parts[3], "are ");
+  EXPECT_EQ(parts[4], "");
+  EXPECT_EQ(parts[5], "belong to us");
+  parts.clear();
+
+  orig = ", Facebook, rul,es!, ";
+  folly::split(", ", orig, parts, true);
+  EXPECT_EQ(parts.size(), 2);
+  EXPECT_EQ(parts[0], "Facebook");
+  EXPECT_EQ(parts[1], "rul,es!");
+  parts.clear();
+  folly::split(", ", orig, parts);
+  EXPECT_EQ(parts.size(), 4);
+  EXPECT_EQ(parts[0], "");
+  EXPECT_EQ(parts[1], "Facebook");
+  EXPECT_EQ(parts[2], "rul,es!");
+  EXPECT_EQ(parts[3], "");
+}
+
+template<template<class,class> class VectorType>
+void piecesTest() {
+  VectorType<StringPiece,std::allocator<StringPiece> > pieces;
+  VectorType<StringPiece,std::allocator<StringPiece> > pieces2;
+
+  folly::split(',', "a,b,c", pieces);
+  EXPECT_EQ(pieces.size(), 3);
+  EXPECT_EQ(pieces[0], "a");
+  EXPECT_EQ(pieces[1], "b");
+  EXPECT_EQ(pieces[2], "c");
+
+  pieces.clear();
+
+  folly::split(',', "a,,c", pieces);
+  EXPECT_EQ(pieces.size(), 3);
+  EXPECT_EQ(pieces[0], "a");
+  EXPECT_EQ(pieces[1], "");
+  EXPECT_EQ(pieces[2], "c");
+  pieces.clear();
+
+  folly::split(',', "a,,c", pieces, true);
+  EXPECT_EQ(pieces.size(), 2);
+  EXPECT_EQ(pieces[0], "a");
+  EXPECT_EQ(pieces[1], "c");
+  pieces.clear();
+
+  folly::split(',', ",,a,,c,,,", pieces, true);
+  EXPECT_EQ(pieces.size(), 2);
+  EXPECT_EQ(pieces[0], "a");
+  EXPECT_EQ(pieces[1], "c");
+  pieces.clear();
+
+  // test multiple split w/o clear
+  folly::split(',', ",,a,,c,,,", pieces, true);
+  EXPECT_EQ(pieces.size(), 2);
+  EXPECT_EQ(pieces[0], "a");
+  EXPECT_EQ(pieces[1], "c");
+  folly::split(',', ",,a,,c,,,", pieces, true);
+  EXPECT_EQ(pieces.size(), 4);
+  EXPECT_EQ(pieces[2], "a");
+  EXPECT_EQ(pieces[3], "c");
+  pieces.clear();
+
+  // test multiple split rounds
+  folly::split(",", "a_b,c_d", pieces);
+  EXPECT_EQ(pieces.size(), 2);
+  EXPECT_EQ(pieces[0], "a_b");
+  EXPECT_EQ(pieces[1], "c_d");
+  folly::split("_", pieces[0], pieces2);
+  EXPECT_EQ(pieces2.size(), 2);
+  EXPECT_EQ(pieces2[0], "a");
+  EXPECT_EQ(pieces2[1], "b");
+  pieces2.clear();
+  folly::split("_", pieces[1], pieces2);
+  EXPECT_EQ(pieces2.size(), 2);
+  EXPECT_EQ(pieces2[0], "c");
+  EXPECT_EQ(pieces2[1], "d");
+  pieces.clear();
+  pieces2.clear();
+
+  // test splits that with multi-line delimiter
+  folly::split("ab", "dabcabkdbkab", pieces, true);
+  EXPECT_EQ(pieces.size(), 3);
+  EXPECT_EQ(pieces[0], "d");
+  EXPECT_EQ(pieces[1], "c");
+  EXPECT_EQ(pieces[2], "kdbk");
+  pieces.clear();
+
+  string orig = "ab2342asdfv~~!";
+  folly::split("", orig.c_str(), pieces, true);
+  EXPECT_EQ(pieces.size(), 1);
+  EXPECT_EQ(pieces[0], orig);
+  pieces.clear();
+
+  folly::split("452x;o38asfsajsdlfdf.j", "asfds", pieces, true);
+  EXPECT_EQ(pieces.size(), 1);
+  EXPECT_EQ(pieces[0], "asfds");
+  pieces.clear();
+
+  folly::split("a", "", pieces, true);
+  EXPECT_EQ(pieces.size(), 0);
+  pieces.clear();
+
+  folly::split("a", "", pieces);
+  EXPECT_EQ(pieces.size(), 1);
+  EXPECT_EQ(pieces[0], "");
+  pieces.clear();
+
+  folly::split("a", "abcdefg", pieces, true);
+  EXPECT_EQ(pieces.size(), 1);
+  EXPECT_EQ(pieces[0], "bcdefg");
+  pieces.clear();
+
+  orig = "All, , your bases, are , , belong to us";
+  folly::split(", ", orig, pieces, true);
+  EXPECT_EQ(pieces.size(), 4);
+  EXPECT_EQ(pieces[0], "All");
+  EXPECT_EQ(pieces[1], "your bases");
+  EXPECT_EQ(pieces[2], "are ");
+  EXPECT_EQ(pieces[3], "belong to us");
+  pieces.clear();
+  folly::split(", ", orig, pieces);
+  EXPECT_EQ(pieces.size(), 6);
+  EXPECT_EQ(pieces[0], "All");
+  EXPECT_EQ(pieces[1], "");
+  EXPECT_EQ(pieces[2], "your bases");
+  EXPECT_EQ(pieces[3], "are ");
+  EXPECT_EQ(pieces[4], "");
+  EXPECT_EQ(pieces[5], "belong to us");
+  pieces.clear();
+
+  orig = ", Facebook, rul,es!, ";
+  folly::split(", ", orig, pieces, true);
+  EXPECT_EQ(pieces.size(), 2);
+  EXPECT_EQ(pieces[0], "Facebook");
+  EXPECT_EQ(pieces[1], "rul,es!");
+  pieces.clear();
+  folly::split(", ", orig, pieces);
+  EXPECT_EQ(pieces.size(), 4);
+  EXPECT_EQ(pieces[0], "");
+  EXPECT_EQ(pieces[1], "Facebook");
+  EXPECT_EQ(pieces[2], "rul,es!");
+  EXPECT_EQ(pieces[3], "");
+  pieces.clear();
+
+  const char* str = "a,b";
+  folly::split(',', StringPiece(str), pieces);
+  EXPECT_EQ(pieces.size(), 2);
+  EXPECT_EQ(pieces[0], "a");
+  EXPECT_EQ(pieces[1], "b");
+  EXPECT_EQ(pieces[0].start(), str);
+  EXPECT_EQ(pieces[1].start(), str + 2);
+
+  std::set<StringPiece> unique;
+  folly::splitTo<StringPiece>(":", "asd:bsd:asd:asd:bsd:csd::asd",
+    std::inserter(unique, unique.begin()), true);
+  EXPECT_EQ(unique.size(), 3);
+  if (unique.size() == 3) {
+    EXPECT_EQ(*unique.begin(), "asd");
+    EXPECT_EQ(*--unique.end(), "csd");
+  }
+
+  VectorType<fbstring,std::allocator<fbstring> > blah;
+  folly::split('-', "a-b-c-d-f-e", blah);
+  EXPECT_EQ(blah.size(), 6);
+}
+
+}
+
+TEST(Split, split_vector) {
+  splitTest<std::vector>();
+}
+TEST(Split, split_fbvector) {
+  splitTest<folly::fbvector>();
+}
+TEST(Split, pieces_vector) {
+  piecesTest<std::vector>();
+}
+TEST(Split, pieces_fbvector) {
+  piecesTest<folly::fbvector>();
+}
+
+//////////////////////////////////////////////////////////////////////
+
+BENCHMARK(splitOnSingleChar, iters) {
+  const std::string line = "one:two:three:four";
+  for (int i = 0; i < iters << 4; ++i) {
+    std::vector<StringPiece> pieces;
+    folly::split(':', line, pieces);
+  }
+}
+
+BENCHMARK(splitStr, iters) {
+  const std::string line = "one-*-two-*-three-*-four";
+  for (int i = 0; i < iters << 4; ++i) {
+    std::vector<StringPiece> pieces;
+    folly::split("-*-", line, pieces);
+  }
+}
+
+BENCHMARK(boost_splitOnSingleChar, iters) {
+  std::string line = "one:two:three:four";
+  for (int i = 0; i < iters << 4; ++i) {
+    std::vector<boost::iterator_range<std::string::iterator>> pieces;
+    boost::split(pieces, line, [] (char c) { return c == ':'; });
+  }
+}
+
+int main(int argc, char *argv[]) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret) {
+    initBenchmark();
+    if (FLAGS_benchmark) {
+      folly::runBenchmarks();
+    }
+  }
+  return ret;
+}
+
diff --git a/folly/test/SynchronizedTest.cpp b/folly/test/SynchronizedTest.cpp
new file mode 100644
index 00000000..a4942930
--- /dev/null
+++ b/folly/test/SynchronizedTest.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// @author: Andrei Alexandrescu (aalexandre)
+
+// Test bed for folly/Synchronized.h
+
+#include "folly/Synchronized.h"
+#include "folly/RWSpinLock.h"
+#include "folly/test/SynchronizedTestLib.h"
+#include <gtest/gtest.h>
+
+
+TEST(Synchronized, Basic) {
+  testBasic<std::mutex>();
+  testBasic<std::recursive_mutex>();
+  testBasic<std::timed_mutex>();
+  testBasic<std::recursive_timed_mutex>();
+
+  testBasic<folly::RWTicketSpinLock32>();
+
+  testBasic<boost::mutex>();
+  testBasic<boost::recursive_mutex>();
+  testBasic<boost::shared_mutex>();
+  testBasic<boost::timed_mutex>();
+  testBasic<boost::recursive_timed_mutex>();
+}
+
+TEST(Synchronized, Concurrency) {
+  testConcurrency<std::mutex>();
+  testConcurrency<std::recursive_mutex>();
+  testConcurrency<std::timed_mutex>();
+  testConcurrency<std::recursive_timed_mutex>();
+
+  testConcurrency<folly::RWTicketSpinLock32>();
+
+  testConcurrency<boost::mutex>();
+  testConcurrency<boost::recursive_mutex>();
+  testConcurrency<boost::shared_mutex>();
+  testConcurrency<boost::timed_mutex>();
+  testConcurrency<boost::recursive_timed_mutex>();
+}
+
+
+TEST(Synchronized, DualLocking) {
+  testDualLocking<std::mutex>();
+  testDualLocking<std::recursive_mutex>();
+  testDualLocking<std::timed_mutex>();
+  testDualLocking<std::recursive_timed_mutex>();
+
+  testDualLocking<folly::RWTicketSpinLock32>();
+
+  testDualLocking<boost::mutex>();
+  testDualLocking<boost::recursive_mutex>();
+  testDualLocking<boost::shared_mutex>();
+  testDualLocking<boost::timed_mutex>();
+  testDualLocking<boost::recursive_timed_mutex>();
+}
+
+
+TEST(Synchronized, DualLockingWithConst) {
+  testDualLockingWithConst<std::mutex>();
+  testDualLockingWithConst<std::recursive_mutex>();
+  testDualLockingWithConst<std::timed_mutex>();
+  testDualLockingWithConst<std::recursive_timed_mutex>();
+
+  testDualLockingWithConst<folly::RWTicketSpinLock32>();
+
+  testDualLockingWithConst<boost::mutex>();
+  testDualLockingWithConst<boost::recursive_mutex>();
+  testDualLockingWithConst<boost::shared_mutex>();
+  testDualLockingWithConst<boost::timed_mutex>();
+  testDualLockingWithConst<boost::recursive_timed_mutex>();
+}
+
+
+TEST(Synchronized, TimedSynchronized) {
+  testTimedSynchronized<std::timed_mutex>();
+  testTimedSynchronized<std::recursive_timed_mutex>();
+
+  testTimedSynchronized<boost::timed_mutex>();
+  testTimedSynchronized<boost::recursive_timed_mutex>();
+  testTimedSynchronized<boost::shared_mutex>();
+}
+
+TEST(Synchronized, ConstCopy) {
+  testConstCopy<std::timed_mutex>();
+  testConstCopy<std::recursive_timed_mutex>();
+
+  testConstCopy<boost::timed_mutex>();
+  testConstCopy<boost::recursive_timed_mutex>();
+  testConstCopy<boost::shared_mutex>();
+}
diff --git a/folly/test/SynchronizedTestLib-inl.h b/folly/test/SynchronizedTestLib-inl.h
new file mode 100644
index 00000000..aebb839b
--- /dev/null
+++ b/folly/test/SynchronizedTestLib-inl.h
@@ -0,0 +1,291 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_TEST_SYNCHRONIZEDTESTLIB_INL_H
+#define FOLLY_TEST_SYNCHRONIZEDTESTLIB_INL_H
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <random>
+#include <functional>
+#include <thread>
+#include <vector>
+#include <glog/logging.h>
+#include "folly/Foreach.h"
+#include "folly/Random.h"
+#include "folly/Synchronized.h"
+
+
+static const auto seed = folly::randomNumberSeed();
+typedef std::mt19937 RandomT;
+static RandomT rng(seed);
+
+template <class Integral1, class Integral2>
+Integral2 random(Integral1 low, Integral2 up) {
+  std::uniform_int_distribution<> range(low, up);
+  return range(rng);
+}
+
+template <class Mutex>
+void testBasic() {
+  folly::Synchronized<std::vector<int>, Mutex> obj;
+
+  obj->resize(1000);
+
+  auto obj2 = obj;
+  EXPECT_EQ(obj2->size(), 1000);
+
+  SYNCHRONIZED (obj) {
+    obj.push_back(10);
+    EXPECT_EQ(obj.size(), 1001);
+    EXPECT_EQ(obj.back(), 10);
+    EXPECT_EQ(obj2->size(), 1000);
+
+    UNSYNCHRONIZED(obj) {
+      EXPECT_EQ(obj->size(), 1001);
+    }
+  }
+
+  SYNCHRONIZED_CONST (obj) {
+    EXPECT_EQ(obj.size(), 1001);
+    UNSYNCHRONIZED(obj) {
+      EXPECT_EQ(obj->size(), 1001);
+    }
+  }
+
+  SYNCHRONIZED (lockedObj, *&obj) {
+    lockedObj.front() = 2;
+  }
+
+  EXPECT_EQ(obj->size(), 1001);
+  EXPECT_EQ(obj->back(), 10);
+  EXPECT_EQ(obj2->size(), 1000);
+
+  EXPECT_EQ(FB_ARG_2_OR_1(1, 2), 2);
+  EXPECT_EQ(FB_ARG_2_OR_1(1), 1);
+}
+
+template <class Mutex> void testConcurrency() {
+  folly::Synchronized<std::vector<int>, Mutex> v;
+
+  struct Local {
+    static bool threadMain(int i,
+                           folly::Synchronized<std::vector<int>, Mutex>& pv) {
+      usleep(::random(100 * 1000, 1000 * 1000));
+
+      // Test operator->
+      pv->push_back(2 * i);
+
+      // Aaand test the SYNCHRONIZED macro
+      SYNCHRONIZED (v, pv) {
+        v.push_back(2 * i + 1);
+      }
+
+      return true;
+    }
+  };
+
+  std::vector<std::thread> results;
+
+  static const size_t threads = 100;
+  FOR_EACH_RANGE (i, 0, threads) {
+    results.push_back(std::thread([&, i]() { Local::threadMain(i, v); }));
+  }
+
+  FOR_EACH (i, results) {
+    i->join();
+  }
+
+  std::vector<int> result;
+  v.swap(result);
+
+  EXPECT_EQ(result.size(), 2 * threads);
+  sort(result.begin(), result.end());
+
+  FOR_EACH_RANGE (i, 0, 2 * threads) {
+    EXPECT_EQ(result[i], i);
+  }
+}
+
+template <class Mutex> void testDualLocking() {
+  folly::Synchronized<std::vector<int>, Mutex> v;
+  folly::Synchronized<std::map<int, int>, Mutex> m;
+
+  struct Local {
+    static bool threadMain(
+      int i,
+      folly::Synchronized<std::vector<int>, Mutex>& pv,
+      folly::Synchronized<std::map<int, int>, Mutex>& pm) {
+
+      usleep(::random(100 * 1000, 1000 * 1000));
+
+      if (i & 1) {
+        SYNCHRONIZED_DUAL (v, pv, m, pm) {
+          v.push_back(i);
+          m[i] = i + 1;
+        }
+      } else {
+        SYNCHRONIZED_DUAL (m, pm, v, pv) {
+          v.push_back(i);
+          m[i] = i + 1;
+        }
+      }
+
+      return true;
+    }
+  };
+
+  std::vector<std::thread> results;
+
+  static const size_t threads = 100;
+  FOR_EACH_RANGE (i, 0, threads) {
+    results.push_back(
+      std::thread([&, i]() { Local::threadMain(i, v, m); }));
+  }
+
+  FOR_EACH (i, results) {
+    i->join();
+  }
+
+  std::vector<int> result;
+  v.swap(result);
+
+  EXPECT_EQ(result.size(), threads);
+  sort(result.begin(), result.end());
+
+  FOR_EACH_RANGE (i, 0, threads) {
+    EXPECT_EQ(result[i], i);
+  }
+}
+
+template <class Mutex> void testDualLockingWithConst() {
+  folly::Synchronized<std::vector<int>, Mutex> v;
+  folly::Synchronized<std::map<int, int>, Mutex> m;
+
+  struct Local {
+    static bool threadMain(
+      int i,
+      folly::Synchronized<std::vector<int>, Mutex>& pv,
+      const folly::Synchronized<std::map<int, int>, Mutex>& pm) {
+
+      usleep(::random(100 * 1000, 1000 * 1000));
+
+      if (i & 1) {
+        SYNCHRONIZED_DUAL (v, pv, m, pm) {
+          size_t s = m.size();
+          v.push_back(i);
+        }
+      } else {
+        SYNCHRONIZED_DUAL (m, pm, v, pv) {
+          size_t s = m.size();
+          v.push_back(i);
+        }
+      }
+
+      return true;
+    }
+  };
+
+  std::vector<std::thread> results;
+
+  static const size_t threads = 100;
+  FOR_EACH_RANGE (i, 0, threads) {
+    results.push_back(
+      std::thread([&, i]() { Local::threadMain(i, v, m); }));
+  }
+
+  FOR_EACH (i, results) {
+    i->join();
+  }
+
+  std::vector<int> result;
+  v.swap(result);
+
+  EXPECT_EQ(result.size(), threads);
+  sort(result.begin(), result.end());
+
+  FOR_EACH_RANGE (i, 0, threads) {
+    EXPECT_EQ(result[i], i);
+  }
+}
+
+template <class Mutex> void testTimedSynchronized() {
+  folly::Synchronized<std::vector<int>, Mutex> v;
+
+  struct Local {
+    static bool threadMain(int i,
+                           folly::Synchronized<std::vector<int>, Mutex>& pv) {
+      usleep(::random(100 * 1000, 1000 * 1000));
+
+      // Test operator->
+      pv->push_back(2 * i);
+
+      // Aaand test the TIMED_SYNCHRONIZED macro
+      for (;;)
+        TIMED_SYNCHRONIZED (10, v, pv) {
+          if (v) {
+            usleep(::random(15 * 1000, 150 * 1000));
+            v->push_back(2 * i + 1);
+            return true;
+          }
+          else {
+            // do nothing
+            usleep(::random(10 * 1000, 100 * 1000));
+          }
+        }
+
+      return true;
+    }
+  };
+
+  std::vector<std::thread> results;
+
+  static const size_t threads = 100;
+  FOR_EACH_RANGE (i, 0, threads) {
+    results.push_back(std::thread([&, i]() { Local::threadMain(i, v); }));
+  }
+
+  FOR_EACH (i, results) {
+    i->join();
+  }
+
+  std::vector<int> result;
+  v.swap(result);
+
+  EXPECT_EQ(result.size(), 2 * threads);
+  sort(result.begin(), result.end());
+
+  FOR_EACH_RANGE (i, 0, 2 * threads) {
+    EXPECT_EQ(result[i], i);
+  }
+}
+
+template <class Mutex> void testConstCopy() {
+  std::vector<int> input = {1, 2, 3};
+  const folly::Synchronized<std::vector<int>, Mutex> v(input);
+
+  std::vector<int> result;
+
+  v.copy(&result);
+  EXPECT_EQ(result, input);
+
+  result = v.copy();
+  EXPECT_EQ(result, input);
+}
+
+
+#endif  /* FOLLY_TEST_SYNCHRONIZEDTESTLIB_INL_H */
diff --git a/folly/test/SynchronizedTestLib.h b/folly/test/SynchronizedTestLib.h
new file mode 100644
index 00000000..faaef0f9
--- /dev/null
+++ b/folly/test/SynchronizedTestLib.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_TEST_SYNCHRONIZEDTESTLIB_H
+#define FOLLY_TEST_SYNCHRONIZEDTESTLIB_H
+
+// We have mutex types outside of folly that we want to test with Synchronized.
+// Make it easy for mutex implementators to test their classes with
+// Synchronized by just having a test like:
+//
+// class MyMutex { ... };
+//
+// TEST(Synchronized, Basic) {
+//   testBasic<MyMutex>();
+// }
+//
+// ... similar for testConcurrency, testDualLocking, etc.
+
+
+template <class Mutex> void testBasic();
+
+template <class Mutex> void testConcurrency();
+
+template <class Mutex> void testDualLocking();
+
+template <class Mutex> void testDualLockingWithConst();
+
+template <class Mutex> void testTimedSynchronized();
+
+template <class Mutex> void testConstCopy();
+
+#include "folly/test/SynchronizedTestLib-inl.h"
+
+#endif /* FOLLY_TEST_SYNCHRONIZEDTESTLIB_H */
diff --git a/folly/test/ThreadCachedArenaTest.cpp b/folly/test/ThreadCachedArenaTest.cpp
new file mode 100644
index 00000000..f5c83cbc
--- /dev/null
+++ b/folly/test/ThreadCachedArenaTest.cpp
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/ThreadCachedArena.h"
+#include "folly/StlAllocator.h"
+
+#include <mutex>
+#include <thread>
+#include <iterator>
+#include <algorithm>
+#include <random>
+#include <unordered_map>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "folly/Range.h"
+#include "folly/Benchmark.h"
+
+using namespace folly;
+
+namespace {
+
+class ArenaTester {
+ public:
+  explicit ArenaTester(ThreadCachedArena& arena) : arena_(&arena) { }
+
+  void allocate(size_t count, size_t maxSize);
+  void verify();
+  void merge(ArenaTester&& other);
+
+ private:
+  std::mutex mergeMutex_;
+  std::vector<std::pair<uint8_t, Range<uint8_t*>>> areas_;
+  ThreadCachedArena* arena_;
+};
+
+void ArenaTester::allocate(size_t count, size_t maxSize) {
+  // Allocate chunks of memory of random sizes
+  std::mt19937 rnd;
+  std::uniform_int_distribution<uint32_t> sizeDist(1, maxSize - 1);
+  areas_.clear();
+  areas_.reserve(count);
+  for (size_t i = 0; i < count; i++) {
+    size_t size = sizeDist(rnd);
+    uint8_t* p = static_cast<uint8_t*>(arena_->allocate(size));
+    areas_.emplace_back(rnd() & 0xff, Range<uint8_t*>(p, size));
+  }
+
+  // Fill each area with a different value, to prove that they don't overlap
+  // Fill in random order.
+  std::random_shuffle(
+      areas_.begin(), areas_.end(),
+      [&rnd] (int n) -> int {
+        return std::uniform_int_distribution<uint32_t>(0, n-1)(rnd);
+      });
+
+  for (auto& p : areas_) {
+    std::fill(p.second.begin(), p.second.end(), p.first);
+  }
+}
+
+void ArenaTester::verify() {
+  for (auto& p : areas_) {
+    for (auto v : p.second) {
+      EXPECT_EQ(p.first, v);
+    }
+  }
+}
+
+void ArenaTester::merge(ArenaTester&& other) {
+  {
+    std::lock_guard<std::mutex> lock(mergeMutex_);
+    std::move(other.areas_.begin(), other.areas_.end(),
+              std::back_inserter(areas_));
+  }
+  other.areas_.clear();
+}
+
+}  // namespace
+
+TEST(ThreadCachedArena, BlockSize) {
+  struct Align { char c; } __attribute__((aligned));
+  static const size_t alignment = alignof(Align);
+  static const size_t requestedBlockSize = 64;
+
+  ThreadCachedArena arena(requestedBlockSize);
+  size_t blockSize = alignment;
+  uint8_t* prev = static_cast<uint8_t*>(arena.allocate(1));
+
+  // Keep allocating until we're no longer one single alignment away from the
+  // previous allocation -- that's when we've gotten to the next block.
+  uint8_t* p;
+  while ((p = static_cast<uint8_t*>(arena.allocate(1))) ==
+         prev + alignment) {
+    prev = p;
+    blockSize += alignment;
+  }
+
+  VLOG(1) << "Requested block size: " << requestedBlockSize << ", actual: "
+          << blockSize;
+  EXPECT_LE(requestedBlockSize, blockSize);
+}
+
+TEST(ThreadCachedArena, SingleThreaded) {
+  static const size_t requestedBlockSize = 64;
+  ThreadCachedArena arena(requestedBlockSize);
+  ArenaTester tester(arena);
+  tester.allocate(100, 100 << 10);
+  tester.verify();
+}
+
+TEST(ThreadCachedArena, MultiThreaded) {
+  static const size_t requestedBlockSize = 64;
+  ThreadCachedArena arena(requestedBlockSize);
+  ArenaTester mainTester(arena);
+
+  // Do this twice, to catch the possibility that memory from the first
+  // round gets freed
+  static const size_t numThreads = 20;
+  for (size_t i = 0; i < 2; i++) {
+    std::vector<std::thread> threads;
+    threads.reserve(numThreads);
+    for (size_t j = 0; j < numThreads; j++) {
+      threads.emplace_back(
+          [&arena, &mainTester] () {
+            ArenaTester tester(arena);
+            tester.allocate(500, 1 << 10);
+            tester.verify();
+            mainTester.merge(std::move(tester));
+          });
+    }
+    for (auto& t : threads) {
+      t.join();
+    }
+  }
+
+  mainTester.verify();
+}
+
+TEST(ThreadCachedArena, StlAllocator) {
+  typedef std::unordered_map<
+    int, int, std::hash<int>, std::equal_to<int>,
+    StlAllocator<ThreadCachedArena, std::pair<const int, int>>> Map;
+
+  static const size_t requestedBlockSize = 64;
+  ThreadCachedArena arena(requestedBlockSize);
+
+  Map map {0, std::hash<int>(), std::equal_to<int>(),
+           StlAllocator<ThreadCachedArena, std::pair<const int, int>>(&arena)};
+
+  for (int i = 0; i < 1000; i++) {
+    map[i] = i;
+  }
+
+  for (int i = 0; i < 1000; i++) {
+    EXPECT_EQ(i, map[i]);
+  }
+}
+
+namespace {
+
+static const int kNumValues = 10000;
+
+BENCHMARK(bmUMStandard, iters) {
+  typedef std::unordered_map<int, int> Map;
+
+  while (iters--) {
+    Map map {0};
+    for (int i = 0; i < kNumValues; i++) {
+      map[i] = i;
+    }
+  }
+}
+
+BENCHMARK(bmUMArena, iters) {
+  typedef std::unordered_map<
+    int, int, std::hash<int>, std::equal_to<int>,
+    StlAllocator<ThreadCachedArena, std::pair<const int, int>>> Map;
+
+  while (iters--) {
+    ThreadCachedArena arena;
+
+    Map map {0, std::hash<int>(), std::equal_to<int>(),
+             StlAllocator<ThreadCachedArena, std::pair<const int, int>>(
+                 &arena)};
+
+    for (int i = 0; i < kNumValues; i++) {
+      map[i] = i;
+    }
+  }
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(bmMStandard, iters) {
+  typedef std::map<int, int> Map;
+
+  while (iters--) {
+    Map map;
+    for (int i = 0; i < kNumValues; i++) {
+      map[i] = i;
+    }
+  }
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(bmMArena, iters) {
+  typedef std::map<
+    int, int, std::less<int>,
+    StlAllocator<ThreadCachedArena, std::pair<const int, int>>> Map;
+
+  while (iters--) {
+    ThreadCachedArena arena;
+
+    Map map {std::less<int>(),
+             StlAllocator<ThreadCachedArena, std::pair<const int, int>>(
+                 &arena)};
+
+    for (int i = 0; i < kNumValues; i++) {
+      map[i] = i;
+    }
+  }
+}
+
+BENCHMARK_DRAW_LINE()
+
+}  // namespace
+
+
+// Benchmark                               Iters   Total t    t/iter iter/sec
+// ----------------------------------------------------------------------------
+// Comparing benchmarks: bmUMStandard,bmUMArena
+//  + 143% bmUMStandard                     1570  2.005 s   1.277 ms  782.9
+// *       bmUMArena                        3817  2.003 s   524.7 us  1.861 k
+// ----------------------------------------------------------------------------
+// Comparing benchmarks: bmMStandard,bmMArena
+//  +79.0% bmMStandard                      1197  2.009 s   1.678 ms  595.8
+// *       bmMArena                         2135  2.002 s   937.6 us  1.042 k
+// ----------------------------------------------------------------------------
+
+int main(int argc, char *argv[]) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto ret = RUN_ALL_TESTS();
+  if (!ret && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return ret;
+}
+
diff --git a/folly/test/ThreadCachedIntTest.cpp b/folly/test/ThreadCachedIntTest.cpp
new file mode 100644
index 00000000..703fffdd
--- /dev/null
+++ b/folly/test/ThreadCachedIntTest.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/ThreadCachedInt.h"
+#include "folly/Hash.h"
+
+#include <atomic>
+#include <thread>
+#include <gtest/gtest.h>
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include "folly/Benchmark.h"
+
+using namespace folly;
+
+TEST(ThreadCachedInt, SingleThreadedNotCached) {
+  ThreadCachedInt<int64_t> val(0, 0);
+  EXPECT_EQ(0, val.readFast());
+  ++val;
+  EXPECT_EQ(1, val.readFast());
+  for (int i = 0; i < 41; ++i) {
+    val.increment(1);
+  }
+  EXPECT_EQ(42, val.readFast());
+  --val;
+  EXPECT_EQ(41, val.readFast());
+}
+
+// Note: This is somewhat fragile to the implementation.  If this causes
+// problems, feel free to remove it.
+TEST(ThreadCachedInt, SingleThreadedCached) {
+  ThreadCachedInt<int64_t> val(0, 10);
+  EXPECT_EQ(0, val.readFast());
+  ++val;
+  EXPECT_EQ(0, val.readFast());
+  for (int i = 0; i < 7; ++i) {
+    val.increment(1);
+  }
+  EXPECT_EQ(0, val.readFast());
+  EXPECT_EQ(0, val.readFastAndReset());
+  EXPECT_EQ(8, val.readFull());
+  EXPECT_EQ(8, val.readFullAndReset());
+  EXPECT_EQ(0, val.readFull());
+  EXPECT_EQ(0, val.readFast());
+}
+
+ThreadCachedInt<int32_t> globalInt32(0, 11);
+ThreadCachedInt<int64_t> globalInt64(0, 11);
+int kNumInserts = 100000;
+DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
+#define CREATE_INC_FUNC(size)                                       \
+  void incFunc ## size () {                                         \
+    const int num = kNumInserts / FLAGS_numThreads;                 \
+    for (int i = 0; i < num; ++i) {                                 \
+      ++globalInt ## size ;                                         \
+    }                                                               \
+  }
+CREATE_INC_FUNC(64);
+CREATE_INC_FUNC(32);
+
+// Confirms counts are accurate with competing threads
+TEST(ThreadCachedInt, MultiThreadedCached) {
+  kNumInserts = 100000;
+  CHECK_EQ(0, kNumInserts % FLAGS_numThreads) <<
+    "FLAGS_numThreads must evenly divide kNumInserts (" << kNumInserts << ").";
+  const int numPerThread = kNumInserts / FLAGS_numThreads;
+  ThreadCachedInt<int64_t> TCInt64(0, numPerThread - 2);
+  {
+    std::atomic<bool> run(true);
+    std::atomic<int> threadsDone(0);
+    std::vector<std::thread> threads;
+    for (int i = 0; i < FLAGS_numThreads; ++i) {
+      threads.push_back(std::thread([&] {
+        FOR_EACH_RANGE(k, 0, numPerThread) {
+          ++TCInt64;
+        }
+        std::atomic_fetch_add(&threadsDone, 1);
+        while (run.load()) { usleep(100); }
+      }));
+    }
+
+    // We create and increment another ThreadCachedInt here to make sure it
+    // doesn't interact with the other instances
+    ThreadCachedInt<int64_t> otherTCInt64(0, 10);
+    otherTCInt64.set(33);
+    ++otherTCInt64;
+
+    while (threadsDone.load() < FLAGS_numThreads) { usleep(100); }
+
+    ++otherTCInt64;
+
+    // Threads are done incrementing, but caches have not been flushed yet, so
+    // we have to readFull.
+    EXPECT_NE(kNumInserts, TCInt64.readFast());
+    EXPECT_EQ(kNumInserts, TCInt64.readFull());
+
+    run.store(false);
+    for (auto& t : threads) {
+      t.join();
+    }
+
+  }  // Caches are flushed when threads finish
+  EXPECT_EQ(kNumInserts, TCInt64.readFast());
+}
+
+#define MAKE_MT_CACHE_SIZE_BM(size)                             \
+  void BM_mt_cache_size ## size (int iters, int cacheSize) {    \
+    kNumInserts = iters;                                        \
+    globalInt ## size.set(0);                                   \
+    globalInt ## size.setCacheSize(cacheSize);                  \
+    std::vector<std::thread> threads;                           \
+    for (int i = 0; i < FLAGS_numThreads; ++i) {                \
+      threads.push_back(std::thread(incFunc ## size));          \
+    }                                                           \
+    for (auto& t : threads) {                                   \
+      t.join();                                                 \
+    }                                                           \
+  }
+MAKE_MT_CACHE_SIZE_BM(64);
+MAKE_MT_CACHE_SIZE_BM(32);
+
+#define REG_BASELINE(name, inc_stmt)                            \
+  BENCHMARK(FB_CONCATENATE(BM_mt_baseline_, name), iters) {     \
+    const int iterPerThread = iters / FLAGS_numThreads;         \
+    std::vector<std::thread> threads;                           \
+    for (int i = 0; i < FLAGS_numThreads; ++i) {                \
+      threads.push_back(std::thread([&]() {                     \
+            for (int i = 0; i < iterPerThread; ++i) {           \
+              inc_stmt;                                         \
+            }                                                   \
+          }));                                                  \
+    }                                                           \
+    for (auto& t : threads) {                                   \
+      t.join();                                                 \
+    }                                                           \
+  }
+
+ThreadLocal<int64_t> globalTL64Baseline;
+ThreadLocal<int32_t> globalTL32Baseline;
+std::atomic<int64_t> globalInt64Baseline(0);
+std::atomic<int32_t> globalInt32Baseline(0);
+__thread int64_t global__thread64;
+__thread int32_t global__thread32;
+
+// Alternate lock-free implementation.  Acheives about the same performance,
+// but uses about 20x more memory than ThreadCachedInt with 24 threads.
+struct ShardedAtomicInt {
+  static const int64_t kBuckets_ = 2048;
+  std::atomic<int64_t> ints_[kBuckets_];
+
+  inline void inc(int64_t val = 1) {
+    int bucket = hash::twang_mix64(pthread_self()) & (kBuckets_ - 1);
+    std::atomic_fetch_add(&ints_[bucket], val);
+  }
+
+  // read the first few and extrapolate
+  int64_t readFast() {
+    int64_t ret = 0;
+    static const int numToRead = 8;
+    FOR_EACH_RANGE(i, 0, numToRead) {
+      ret += ints_[i].load(std::memory_order_relaxed);
+    }
+    return ret * (kBuckets_ / numToRead);
+  }
+
+  // readFull is lock-free, but has to do thousands of loads...
+  int64_t readFull() {
+    int64_t ret = 0;
+    for (auto& i : ints_) {
+      // Fun fact - using memory_order_consume below reduces perf 30-40% in high
+      // contention benchmarks.
+      ret += i.load(std::memory_order_relaxed);
+    }
+    return ret;
+  }
+};
+ShardedAtomicInt shd_int64;
+
+REG_BASELINE(_thread64, global__thread64 += 1);
+REG_BASELINE(_thread32, global__thread32 += 1);
+REG_BASELINE(ThreadLocal64, *globalTL64Baseline += 1);
+REG_BASELINE(ThreadLocal32, *globalTL32Baseline += 1);
+REG_BASELINE(atomic_inc64, std::atomic_fetch_add(&globalInt64Baseline, 1L));
+REG_BASELINE(atomic_inc32, std::atomic_fetch_add(&globalInt32Baseline, 1));
+REG_BASELINE(ShardedAtm64, shd_int64.inc());
+
+BENCHMARK_PARAM(BM_mt_cache_size64, 0);
+BENCHMARK_PARAM(BM_mt_cache_size64, 10);
+BENCHMARK_PARAM(BM_mt_cache_size64, 100);
+BENCHMARK_PARAM(BM_mt_cache_size64, 1000);
+BENCHMARK_PARAM(BM_mt_cache_size32, 0);
+BENCHMARK_PARAM(BM_mt_cache_size32, 10);
+BENCHMARK_PARAM(BM_mt_cache_size32, 100);
+BENCHMARK_PARAM(BM_mt_cache_size32, 1000);
+BENCHMARK_DRAW_LINE();
+
+// single threaded
+BENCHMARK(Atomic_readFull) {
+  doNotOptimizeAway(globalInt64Baseline.load(std::memory_order_relaxed));
+}
+BENCHMARK(ThrCache_readFull) {
+  doNotOptimizeAway(globalInt64.readFull());
+}
+BENCHMARK(Sharded_readFull) {
+  doNotOptimizeAway(shd_int64.readFull());
+}
+BENCHMARK(ThrCache_readFast) {
+  doNotOptimizeAway(globalInt64.readFast());
+}
+BENCHMARK(Sharded_readFast) {
+  doNotOptimizeAway(shd_int64.readFast());
+}
+BENCHMARK_DRAW_LINE();
+
+// multi threaded
+REG_BASELINE(Atomic_readFull,
+      doNotOptimizeAway(globalInt64Baseline.load(std::memory_order_relaxed)));
+REG_BASELINE(ThrCache_readFull, doNotOptimizeAway(globalInt64.readFull()));
+REG_BASELINE(Sharded_readFull, doNotOptimizeAway(shd_int64.readFull()));
+REG_BASELINE(ThrCache_readFast, doNotOptimizeAway(globalInt64.readFast()));
+REG_BASELINE(Sharded_readFast, doNotOptimizeAway(shd_int64.readFast()));
+BENCHMARK_DRAW_LINE();
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  google::SetCommandLineOptionWithMode(
+    "bm_max_iters", "10000000", google::SET_FLAG_IF_DEFAULT
+  );
+  if (FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return RUN_ALL_TESTS();
+}
+
+/*
+ Ran with 20 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
+
+ Benchmark                               Iters   Total t    t/iter iter/sec
+ ------------------------------------------------------------------------------
+ + 103% BM_mt_baseline__thread64     10000000  13.54 ms  1.354 ns  704.4 M
+*       BM_mt_baseline__thread32     10000000  6.651 ms  665.1 ps    1.4 G
+ +50.3% BM_mt_baseline_ThreadLocal64  10000000  9.994 ms  999.4 ps  954.2 M
+ +49.9% BM_mt_baseline_ThreadLocal32  10000000  9.972 ms  997.2 ps  956.4 M
+ +2650% BM_mt_baseline_atomic_inc64  10000000  182.9 ms  18.29 ns  52.13 M
+ +2665% BM_mt_baseline_atomic_inc32  10000000  183.9 ms  18.39 ns  51.85 M
+ +75.3% BM_mt_baseline_ShardedAtm64  10000000  11.66 ms  1.166 ns  817.8 M
+ +6670% BM_mt_cache_size64/0         10000000  450.3 ms  45.03 ns  21.18 M
+ +1644% BM_mt_cache_size64/10        10000000    116 ms   11.6 ns   82.2 M
+ + 381% BM_mt_cache_size64/100       10000000  32.04 ms  3.204 ns  297.7 M
+ + 129% BM_mt_cache_size64/1000      10000000  15.24 ms  1.524 ns  625.8 M
+ +6052% BM_mt_cache_size32/0         10000000  409.2 ms  40.92 ns  23.31 M
+ +1304% BM_mt_cache_size32/10        10000000  93.39 ms  9.339 ns  102.1 M
+ + 298% BM_mt_cache_size32/100       10000000  26.52 ms  2.651 ns  359.7 M
+ +68.1% BM_mt_cache_size32/1000      10000000  11.18 ms  1.118 ns  852.9 M
+------------------------------------------------------------------------------
+ +10.4% Atomic_readFull              10000000  36.05 ms  3.605 ns  264.5 M
+ + 619% ThrCache_readFull            10000000  235.1 ms  23.51 ns  40.57 M
+ SLOW   Sharded_readFull              1981093      2 s    1.01 us  967.3 k
+*       ThrCache_readFast            10000000  32.65 ms  3.265 ns  292.1 M
+ +10.0% Sharded_readFast             10000000  35.92 ms  3.592 ns  265.5 M
+------------------------------------------------------------------------------
+ +4.54% BM_mt_baseline_Atomic_readFull  10000000  8.672 ms  867.2 ps  1.074 G
+ SLOW   BM_mt_baseline_ThrCache_readFull  10000000  996.9 ms  99.69 ns  9.567 M
+ SLOW   BM_mt_baseline_Sharded_readFull  10000000  891.5 ms  89.15 ns   10.7 M
+*       BM_mt_baseline_ThrCache_readFast  10000000  8.295 ms  829.5 ps  1.123 G
+ +12.7% BM_mt_baseline_Sharded_readFast  10000000  9.348 ms  934.8 ps   1020 M
+------------------------------------------------------------------------------
+*/
diff --git a/folly/test/ThreadLocalTest.cpp b/folly/test/ThreadLocalTest.cpp
new file mode 100644
index 00000000..d80b851e
--- /dev/null
+++ b/folly/test/ThreadLocalTest.cpp
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/ThreadLocal.h"
+
+#include <map>
+#include <unordered_map>
+#include <set>
+#include <atomic>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <boost/thread/tss.hpp>
+#include <gtest/gtest.h>
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include "folly/Benchmark.h"
+
+using namespace folly;
+
+struct Widget {
+  static int totalVal_;
+  int val_;
+  ~Widget() {
+    totalVal_ += val_;
+  }
+
+  static void customDeleter(Widget* w, TLPDestructionMode mode) {
+    totalVal_ += (mode == TLPDestructionMode::ALL_THREADS) * 1000;
+    delete w;
+  }
+};
+int Widget::totalVal_ = 0;
+
+TEST(ThreadLocalPtr, BasicDestructor) {
+  Widget::totalVal_ = 0;
+  ThreadLocalPtr<Widget> w;
+  std::thread([&w]() {
+      w.reset(new Widget());
+      w.get()->val_ += 10;
+    }).join();
+  EXPECT_EQ(10, Widget::totalVal_);
+}
+
+TEST(ThreadLocalPtr, CustomDeleter1) {
+  Widget::totalVal_ = 0;
+  {
+    ThreadLocalPtr<Widget> w;
+    std::thread([&w]() {
+        w.reset(new Widget(), Widget::customDeleter);
+        w.get()->val_ += 10;
+      }).join();
+    EXPECT_EQ(10, Widget::totalVal_);
+  }
+  EXPECT_EQ(10, Widget::totalVal_);
+}
+
+// Test deleting the ThreadLocalPtr object
+TEST(ThreadLocalPtr, CustomDeleter2) {
+  Widget::totalVal_ = 0;
+  std::thread t;
+  std::mutex mutex;
+  std::condition_variable cv;
+  enum class State {
+    START,
+    DONE,
+    EXIT
+  };
+  State state = State::START;
+  {
+    ThreadLocalPtr<Widget> w;
+    t = std::thread([&]() {
+        w.reset(new Widget(), Widget::customDeleter);
+        w.get()->val_ += 10;
+
+        // Notify main thread that we're done
+        {
+          std::unique_lock<std::mutex> lock(mutex);
+          state = State::DONE;
+          cv.notify_all();
+        }
+
+        // Wait for main thread to allow us to exit
+        {
+          std::unique_lock<std::mutex> lock(mutex);
+          while (state != State::EXIT) {
+            cv.wait(lock);
+          }
+        }
+    });
+
+    // Wait for main thread to start (and set w.get()->val_)
+    {
+      std::unique_lock<std::mutex> lock(mutex);
+      while (state != State::DONE) {
+        cv.wait(lock);
+      }
+    }
+
+    // Thread started but hasn't exited yet
+    EXPECT_EQ(0, Widget::totalVal_);
+
+    // Destroy ThreadLocalPtr<Widget> (by letting it go out of scope)
+  }
+
+  EXPECT_EQ(1010, Widget::totalVal_);
+
+  // Allow thread to exit
+  {
+    std::unique_lock<std::mutex> lock(mutex);
+    state = State::EXIT;
+    cv.notify_all();
+  }
+  t.join();
+
+  EXPECT_EQ(1010, Widget::totalVal_);
+}
+
+TEST(ThreadLocal, BasicDestructor) {
+  Widget::totalVal_ = 0;
+  ThreadLocal<Widget> w;
+  std::thread([&w]() { w->val_ += 10; }).join();
+  EXPECT_EQ(10, Widget::totalVal_);
+}
+
+TEST(ThreadLocal, SimpleRepeatDestructor) {
+  Widget::totalVal_ = 0;
+  {
+    ThreadLocal<Widget> w;
+    w->val_ += 10;
+  }
+  {
+    ThreadLocal<Widget> w;
+    w->val_ += 10;
+  }
+  EXPECT_EQ(20, Widget::totalVal_);
+}
+
+TEST(ThreadLocal, InterleavedDestructors) {
+  Widget::totalVal_ = 0;
+  ThreadLocal<Widget>* w = NULL;
+  int wVersion = 0;
+  const int wVersionMax = 2;
+  int thIter = 0;
+  std::mutex lock;
+  auto th = std::thread([&]() {
+    int wVersionPrev = 0;
+    while (true) {
+      while (true) {
+        std::lock_guard<std::mutex> g(lock);
+        if (wVersion > wVersionMax) {
+          return;
+        }
+        if (wVersion > wVersionPrev) {
+          // We have a new version of w, so it should be initialized to zero
+          EXPECT_EQ((*w)->val_, 0);
+          break;
+        }
+      }
+      std::lock_guard<std::mutex> g(lock);
+      wVersionPrev = wVersion;
+      (*w)->val_ += 10;
+      ++thIter;
+    }
+  });
+  FOR_EACH_RANGE(i, 0, wVersionMax) {
+    int thIterPrev = 0;
+    {
+      std::lock_guard<std::mutex> g(lock);
+      thIterPrev = thIter;
+      delete w;
+      w = new ThreadLocal<Widget>();
+      ++wVersion;
+    }
+    while (true) {
+      std::lock_guard<std::mutex> g(lock);
+      if (thIter > thIterPrev) {
+        break;
+      }
+    }
+  }
+  {
+    std::lock_guard<std::mutex> g(lock);
+    wVersion = wVersionMax + 1;
+  }
+  th.join();
+  EXPECT_EQ(wVersionMax * 10, Widget::totalVal_);
+}
+
+class SimpleThreadCachedInt {
+
+  class NewTag;
+  ThreadLocal<int,NewTag> val_;
+
+ public:
+  void add(int val) {
+    *val_ += val;
+  }
+
+  int read() {
+    int ret = 0;
+    for (const auto& i : val_.accessAllThreads()) {
+      ret += i;
+    }
+    return ret;
+  }
+};
+
+TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
+  const int kNumThreads = 10;
+  SimpleThreadCachedInt stci;
+  std::atomic<bool> run(true);
+  std::atomic<int> totalAtomic(0);
+  std::vector<std::thread> threads;
+  for (int i = 0; i < kNumThreads; ++i) {
+    threads.push_back(std::thread([&,i]() {
+      stci.add(1);
+      totalAtomic.fetch_add(1);
+      while (run.load()) { usleep(100); }
+    }));
+  }
+  while (totalAtomic.load() != kNumThreads) { usleep(100); }
+  EXPECT_EQ(kNumThreads, stci.read());
+  run.store(false);
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
+TEST(ThreadLocal, resetNull) {
+  ThreadLocal<int> tl;
+  tl.reset(new int(4));
+  EXPECT_EQ(*tl.get(), 4);
+  tl.reset();
+  EXPECT_EQ(*tl.get(), 0);
+  tl.reset(new int(5));
+  EXPECT_EQ(*tl.get(), 5);
+}
+
+namespace {
+struct Tag {};
+
+struct Foo {
+  folly::ThreadLocal<int, Tag> tl;
+};
+}  // namespace
+
+TEST(ThreadLocal, Movable1) {
+  Foo a;
+  Foo b;
+  EXPECT_TRUE(a.tl.get() != b.tl.get());
+
+  a = Foo();
+  b = Foo();
+  EXPECT_TRUE(a.tl.get() != b.tl.get());
+}
+
+TEST(ThreadLocal, Movable2) {
+  std::map<int, Foo> map;
+
+  map[42];
+  map[10];
+  map[23];
+  map[100];
+
+  std::set<void*> tls;
+  for (auto& m : map) {
+    tls.insert(m.second.tl.get());
+  }
+
+  // Make sure that we have 4 different instances of *tl
+  EXPECT_EQ(4, tls.size());
+}
+
+// Simple reference implementation using pthread_get_specific
+template<typename T>
+class PThreadGetSpecific {
+ public:
+  PThreadGetSpecific() : key_(0) {
+    pthread_key_create(&key_, OnThreadExit);
+  }
+
+  T* get() const {
+    return static_cast<T*>(pthread_getspecific(key_));
+  }
+
+  void reset(T* t) {
+    delete get();
+    pthread_setspecific(key_, t);
+  }
+  static void OnThreadExit(void* obj) {
+    delete static_cast<T*>(obj);
+  }
+ private:
+  pthread_key_t key_;
+};
+
+DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
+
+#define REG(var)                                                \
+  BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) {               \
+    const int itersPerThread = iters / FLAGS_numThreads;        \
+    std::vector<std::thread> threads;                           \
+    for (int i = 0; i < FLAGS_numThreads; ++i) {                \
+      threads.push_back(std::thread([&]() {                     \
+        var.reset(new int(0));                                  \
+        for (int i = 0; i < itersPerThread; ++i) {              \
+          ++(*var.get());                                       \
+        }                                                       \
+      }));                                                      \
+    }                                                           \
+    for (auto& t : threads) {                                   \
+      t.join();                                                 \
+    }                                                           \
+  }
+
+ThreadLocalPtr<int> tlp;
+REG(tlp);
+PThreadGetSpecific<int> pthread_get_specific;
+REG(pthread_get_specific);
+boost::thread_specific_ptr<int> boost_tsp;
+REG(boost_tsp);
+BENCHMARK_DRAW_LINE();
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  google::SetCommandLineOptionWithMode(
+    "bm_max_iters", "100000000", google::SET_FLAG_IF_DEFAULT
+  );
+  if (FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return RUN_ALL_TESTS();
+}
+
+/*
+Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
+
+Benchmark                               Iters   Total t    t/iter iter/sec
+------------------------------------------------------------------------------
+*       BM_mt_tlp                   100000000  39.88 ms  398.8 ps  2.335 G
+ +5.91% BM_mt_pthread_get_specific  100000000  42.23 ms  422.3 ps  2.205 G
+ + 295% BM_mt_boost_tsp             100000000  157.8 ms  1.578 ns  604.5 M
+------------------------------------------------------------------------------
+*/
diff --git a/folly/test/TimeoutQueueTest.cpp b/folly/test/TimeoutQueueTest.cpp
new file mode 100644
index 00000000..01e63a13
--- /dev/null
+++ b/folly/test/TimeoutQueueTest.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include "folly/TimeoutQueue.h"
+
+using namespace folly;
+
+TEST(TimeoutQueue, Simple) {
+  typedef std::vector<TimeoutQueue::Id> EventVec;
+  EventVec events;
+
+  TimeoutQueue q;
+  TimeoutQueue::Callback cb =
+    [&events](TimeoutQueue::Id id, int64_t now) {
+      events.push_back(id);
+    };
+
+  EXPECT_EQ(1, q.add(0, 10, cb));
+  EXPECT_EQ(2, q.add(0, 11, cb));
+  EXPECT_EQ(3, q.addRepeating(0, 9, cb));
+
+  EXPECT_TRUE(events.empty());
+  EXPECT_EQ(21, q.runOnce(12));  // now+9
+
+  bool r = (EventVec{3,1,2} == events);
+  EXPECT_TRUE(r);
+
+  events.clear();
+  EXPECT_EQ(49, q.runOnce(40));
+  r = (EventVec{3} == events);
+  EXPECT_TRUE(r);
+}
+
+TEST(TimeoutQueue, Erase) {
+  typedef std::vector<TimeoutQueue::Id> EventVec;
+  EventVec events;
+
+  TimeoutQueue q;
+  TimeoutQueue::Callback cb =
+    [&events, &q](TimeoutQueue::Id id, int64_t now) {
+      events.push_back(id);
+      if (id == 2) {
+        q.erase(1);
+      }
+    };
+
+  EXPECT_EQ(1, q.addRepeating(0, 10, cb));
+  EXPECT_EQ(2, q.add(0, 35, cb));
+
+  int64_t now = 0;
+  while (now < std::numeric_limits<int64_t>::max()) {
+    now = q.runOnce(now);
+  }
+
+  bool r = (EventVec{1,1,1,2} == events);
+  EXPECT_TRUE(r);
+}
+
+TEST(TimeoutQueue, RunOnceRepeating) {
+  int count = 0;
+  TimeoutQueue q;
+  TimeoutQueue::Callback cb =
+    [&count, &q](TimeoutQueue::Id id, int64_t now) {
+      if (++count == 100) {
+        EXPECT_TRUE(q.erase(id));
+      }
+    };
+
+  EXPECT_EQ(1, q.addRepeating(0, 0, cb));
+
+  EXPECT_EQ(0, q.runOnce(0));
+  EXPECT_EQ(1, count);
+  EXPECT_EQ(0, q.runOnce(0));
+  EXPECT_EQ(2, count);
+  EXPECT_EQ(std::numeric_limits<int64_t>::max(), q.runLoop(0));
+  EXPECT_EQ(100, count);
+}
+
+TEST(TimeoutQueue, RunOnceReschedule) {
+  int count = 0;
+  TimeoutQueue q;
+  TimeoutQueue::Callback cb =
+    [&count, &q, &cb](TimeoutQueue::Id id, int64_t now) {
+      if (++count < 100) {
+        EXPECT_LT(id, q.add(now, 0, cb));
+      }
+    };
+
+  EXPECT_EQ(1, q.add(0, 0, cb));
+
+  int64_t now = 0;
+  EXPECT_EQ(0, q.runOnce(0));
+  EXPECT_EQ(1, count);
+  EXPECT_EQ(0, q.runOnce(0));
+  EXPECT_EQ(2, count);
+  EXPECT_EQ(std::numeric_limits<int64_t>::max(), q.runLoop(0));
+  EXPECT_EQ(100, count);
+}
+
diff --git a/folly/test/function_benchmark/Makefile.am b/folly/test/function_benchmark/Makefile.am
new file mode 100644
index 00000000..c363fc27
--- /dev/null
+++ b/folly/test/function_benchmark/Makefile.am
@@ -0,0 +1,11 @@
+ACLOCAL_AMFLAGS = -I m4
+
+# depends on libfollybenchmark
+
+# TESTS = function_benchmark
+
+# check_PROGRAMS = $(TESTS)
+
+# noinst_HEADERS = test_functions.h benchmark_impl.h
+
+# function_benchmark_SOURCES = benchmark_impl.cpp main.cpp test_functions.cpp
diff --git a/folly/test/function_benchmark/benchmark_impl.cpp b/folly/test/function_benchmark/benchmark_impl.cpp
new file mode 100644
index 00000000..9dceb495
--- /dev/null
+++ b/folly/test/function_benchmark/benchmark_impl.cpp
@@ -0,0 +1,36 @@
+// Copyright 2004-present Facebook.  All rights reserved.
+#include "folly/test/function_benchmark/benchmark_impl.h"
+
+#include "folly/test/function_benchmark/test_functions.h"
+
+/*
+ * These functions are defined in a separate file so that gcc won't be able to
+ * inline them and optimize away the indirect calls.
+ */
+
+void BM_fn_ptr_invoke_impl(int iters, void (*fn)()) {
+  for (int n = 0; n < iters; ++n) {
+    fn();
+  }
+}
+
+void BM_std_function_invoke_impl(int iters,
+                                 const std::function<void()>& fn) {
+  for (int n = 0; n < iters; ++n) {
+    fn();
+  }
+}
+
+void BM_mem_fn_invoke_impl(int iters,
+                           TestClass* tc,
+                           void (TestClass::*memfn)()) {
+  for (int n = 0; n < iters; ++n) {
+    (tc->*memfn)();
+  }
+}
+
+void BM_virtual_fn_invoke_impl(int iters, VirtualClass* vc) {
+  for (int n = 0; n < iters; ++n) {
+    vc->doNothing();
+  }
+}
diff --git a/folly/test/function_benchmark/benchmark_impl.h b/folly/test/function_benchmark/benchmark_impl.h
new file mode 100644
index 00000000..b6eb40ce
--- /dev/null
+++ b/folly/test/function_benchmark/benchmark_impl.h
@@ -0,0 +1,35 @@
+// Copyright 2004-present Facebook.  All rights reserved.
+#ifndef BENCHMARK_IMPL_H_
+#define BENCHMARK_IMPL_H_
+
+#include <functional>
+
+class TestClass;
+class VirtualClass;
+
+void BM_fn_ptr_invoke_impl(int iters, void (*fn)());
+void BM_std_function_invoke_impl(int iters, const std::function<void()>& fn);
+void BM_mem_fn_invoke_impl(int iters,
+                           TestClass* tc,
+                           void (TestClass::*memfn)());
+void BM_virtual_fn_invoke_impl(int iters, VirtualClass* vc);
+
+// Inlined version of BM_fn_ptr_invoke_impl().
+// The compiler could potentially even optimize the call to the function
+// pointer if it is a constexpr.
+inline void BM_fn_ptr_invoke_inlined_impl(int iters, void (*fn)()) {
+  for (int n = 0; n < iters; ++n) {
+    fn();
+  }
+}
+
+// Invoke a function object as a template parameter.
+// This can be used to directly invoke lambda functions
+template<typename T>
+void BM_invoke_fn_template_impl(int iters, const T& fn) {
+  for (int n = 0; n < iters; ++n) {
+    fn();
+  }
+}
+
+#endif // BENCHMARK_IMPL_H_
diff --git a/folly/test/function_benchmark/main.cpp b/folly/test/function_benchmark/main.cpp
new file mode 100644
index 00000000..33610c9d
--- /dev/null
+++ b/folly/test/function_benchmark/main.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/test/function_benchmark/benchmark_impl.h"
+#include "folly/test/function_benchmark/test_functions.h"
+
+#include "folly/Benchmark.h"
+#include "folly/ScopeGuard.h"
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+using folly::ScopeGuard;
+using folly::makeGuard;
+
+// Declare the bm_max_iters flag from folly/Benchmark.cpp
+DECLARE_int32(bm_max_iters);
+
+// Directly invoking a function
+BENCHMARK(fn_invoke, iters) {
+  for (int n = 0; n < iters; ++n) {
+    doNothing();
+  }
+}
+
+// Invoking a function through a function pointer
+BENCHMARK(fn_ptr_invoke, iters) {
+  BM_fn_ptr_invoke_impl(iters, doNothing);
+}
+
+// Invoking a function through a std::function object
+BENCHMARK(std_function_invoke, iters) {
+  BM_std_function_invoke_impl(iters, doNothing);
+}
+
+// Invoking a member function through a member function pointer
+BENCHMARK(mem_fn_invoke, iters) {
+  TestClass tc;
+  BM_mem_fn_invoke_impl(iters, &tc, &TestClass::doNothing);
+}
+
+// Invoke a function pointer through an inlined wrapper function
+BENCHMARK(fn_ptr_invoke_through_inline, iters) {
+  BM_fn_ptr_invoke_inlined_impl(iters, doNothing);
+}
+
+// Invoke a lambda that calls doNothing() through an inlined wrapper function
+BENCHMARK(lambda_invoke_fn, iters) {
+  BM_invoke_fn_template_impl(iters, [] { doNothing(); });
+}
+
+// Invoke a lambda that does nothing
+BENCHMARK(lambda_noop, iters) {
+  BM_invoke_fn_template_impl(iters, [] {});
+}
+
+// Invoke a lambda that modifies a local variable
+BENCHMARK(lambda_local_var, iters) {
+  uint32_t count1 = 0;
+  uint32_t count2 = 0;
+  BM_invoke_fn_template_impl(iters, [&] {
+    // Do something slightly more complicated than just incrementing a
+    // variable.  Otherwise gcc is smart enough to optimize the loop away.
+    if (count1 & 0x1) {
+      ++count2;
+    }
+    ++count1;
+  });
+
+  // Use the values we computed, so gcc won't optimize the loop away
+  CHECK_EQ(iters, count1);
+  CHECK_EQ(iters / 2, count2);
+}
+
+// Invoke a function pointer through the same wrapper used for lambdas
+BENCHMARK(fn_ptr_invoke_through_template, iters) {
+  BM_invoke_fn_template_impl(iters, doNothing);
+}
+
+// Invoking a virtual method
+BENCHMARK(virtual_fn_invoke, iters) {
+  VirtualClass vc;
+  BM_virtual_fn_invoke_impl(iters, &vc);
+}
+
+// Creating a function pointer and invoking it
+BENCHMARK(fn_ptr_create_invoke, iters) {
+  for (int n = 0; n < iters; ++n) {
+    void (*fn)() = doNothing;
+    fn();
+  }
+}
+
+// Creating a std::function object from a function pointer, and invoking it
+BENCHMARK(std_function_create_invoke, iters) {
+  for (int n = 0; n < iters; ++n) {
+    std::function<void()> fn = doNothing;
+    fn();
+  }
+}
+
+// Creating a pointer-to-member and invoking it
+BENCHMARK(mem_fn_create_invoke, iters) {
+  TestClass tc;
+  for (int n = 0; n < iters; ++n) {
+    void (TestClass::*memfn)() = &TestClass::doNothing;
+    (tc.*memfn)();
+  }
+}
+
+// Using std::bind to create a std::function from a member function,
+// and invoking it
+BENCHMARK(std_bind_create_invoke, iters) {
+  TestClass tc;
+  for (int n = 0; n < iters; ++n) {
+    std::function<void()> fn = std::bind(&TestClass::doNothing, &tc);
+    fn();
+  }
+}
+
+// Using ScopeGuard to invoke a std::function
+BENCHMARK(scope_guard_std_function, iters) {
+  std::function<void()> fn(doNothing);
+  for (int n = 0; n < iters; ++n) {
+    ScopeGuard g = makeGuard(fn);
+  }
+}
+
+// Using ScopeGuard to invoke a std::function,
+// but create the ScopeGuard with an rvalue to a std::function
+BENCHMARK(scope_guard_std_function_rvalue, iters) {
+  for (int n = 0; n < iters; ++n) {
+    ScopeGuard g = makeGuard(std::function<void()>(doNothing));
+  }
+}
+
+// Using ScopeGuard to invoke a function pointer
+BENCHMARK(scope_guard_fn_ptr, iters) {
+  for (int n = 0; n < iters; ++n) {
+    ScopeGuard g = makeGuard(doNothing);
+  }
+}
+
+// Using ScopeGuard to invoke a lambda that does nothing
+BENCHMARK(scope_guard_lambda_noop, iters) {
+  for (int n = 0; n < iters; ++n) {
+    ScopeGuard g = makeGuard([] {});
+  }
+}
+
+// Using ScopeGuard to invoke a lambda that invokes a function
+BENCHMARK(scope_guard_lambda_function, iters) {
+  for (int n = 0; n < iters; ++n) {
+    ScopeGuard g = makeGuard([] { doNothing(); });
+  }
+}
+
+// Using ScopeGuard to invoke a lambda that modifies a local variable
+BENCHMARK(scope_guard_lambda_local_var, iters) {
+  uint32_t count = 0;
+  for (int n = 0; n < iters; ++n) {
+    ScopeGuard g = makeGuard([&] {
+      // Increment count if n is odd.  Without this conditional check
+      // (i.e., if we just increment count each time through the loop),
+      // gcc is smart enough to optimize the entire loop away, and just set
+      // count = iters.
+      if (n & 0x1) {
+        ++count;
+      }
+    });
+  }
+
+  // Check that the value of count is what we expect.
+  // This check is necessary: if we don't use count, gcc detects that count is
+  // unused and optimizes the entire loop away.
+  CHECK_EQ(iters / 2, count);
+}
+
+// main()
+
+int main(int argc, char** argv) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  folly::runBenchmarks();
+}
diff --git a/folly/test/function_benchmark/test_functions.cpp b/folly/test/function_benchmark/test_functions.cpp
new file mode 100644
index 00000000..932ac59b
--- /dev/null
+++ b/folly/test/function_benchmark/test_functions.cpp
@@ -0,0 +1,19 @@
+// Copyright 2004-present Facebook.  All rights reserved.
+#include "folly/test/function_benchmark/test_functions.h"
+
+/*
+ * These functions are defined in a separate file so that
+ * gcc won't be able to inline them.
+ */
+
+void doNothing() {
+}
+
+void TestClass::doNothing() {
+}
+
+VirtualClass::~VirtualClass() {
+}
+
+void VirtualClass::doNothing() {
+};
diff --git a/folly/test/function_benchmark/test_functions.h b/folly/test/function_benchmark/test_functions.h
new file mode 100644
index 00000000..586d3238
--- /dev/null
+++ b/folly/test/function_benchmark/test_functions.h
@@ -0,0 +1,18 @@
+// Copyright 2004-present Facebook.  All rights reserved.
+#ifndef TEST_FUNCTIONS_H_
+#define TEST_FUNCTIONS_H_
+
+void doNothing();
+
+class TestClass {
+ public:
+  void doNothing();
+};
+
+class VirtualClass {
+ public:
+  virtual ~VirtualClass();
+  virtual void doNothing();
+};
+
+#endif // TEST_FUNCTIONS_H_
diff --git a/folly/test/small_vector_test.cpp b/folly/test/small_vector_test.cpp
new file mode 100644
index 00000000..18cb57bf
--- /dev/null
+++ b/folly/test/small_vector_test.cpp
@@ -0,0 +1,752 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/small_vector.h"
+
+#include <gtest/gtest.h>
+#include <string>
+#include <memory>
+#include <iostream>
+#include <limits>
+
+#include <boost/algorithm/string.hpp>
+
+#include "folly/Conv.h"
+
+using folly::small_vector;
+using namespace folly::small_vector_policy;
+
+#if defined(__x86_64__)
+
+static_assert(sizeof(small_vector<int>) == 16,
+              "Object size is not what we expect for small_vector<int>");
+static_assert(sizeof(small_vector<int32_t,2>) == 16,
+              "Object size is not what we expect for "
+              "small_vector<int32_t,2>");
+static_assert(sizeof(small_vector<int,10>) ==
+                10 * sizeof(int) + sizeof(std::size_t),
+              "Object size is not what we expect for small_vector<int,10>");
+
+static_assert(sizeof(small_vector<int32_t,1,uint32_t>) ==
+                8 + 4,
+              "small_vector<int32_t,1,uint32_t> is wrong size");
+static_assert(sizeof(small_vector<int32_t,1,uint16_t>) ==
+                8 + 2,
+              "small_vector<int32_t,1,uint32_t> is wrong size");
+static_assert(sizeof(small_vector<int32_t,1,uint8_t>) ==
+                8 + 1,
+              "small_vector<int32_t,1,uint32_t> is wrong size");
+
+static_assert(sizeof(small_vector<int32_t,1,OneBitMutex>) == 16,
+              "OneBitMutex took more space than expected");
+
+static_assert(sizeof(small_vector<int16_t,4,uint16_t>) == 10,
+              "Sizeof unexpectedly large");
+static_assert(sizeof(small_vector<int16_t,4,uint16_t,OneBitMutex>) == 10,
+              "Sizeof unexpectedly large");
+static_assert(sizeof(small_vector<int16_t,4,NoHeap,uint16_t,
+                                  OneBitMutex>) == 10,
+              "Sizeof unexpectedly large");
+
+#endif
+
+namespace {
+
+struct NontrivialType {
+  static int ctored;
+  explicit NontrivialType() : a(0) {}
+
+  /* implicit */ NontrivialType(int a) : a(a) {
+    ++ctored;
+  }
+
+  NontrivialType(NontrivialType const& s) {
+    ++ctored;
+  }
+
+  NontrivialType& operator=(NontrivialType const& o) {
+    a = o.a;
+    return *this;
+  }
+
+  int32_t a;
+};
+static_assert(!boost::has_trivial_copy<NontrivialType>::value,
+              "NontrivialType isn't trivially copyable");
+
+int NontrivialType::ctored = 0;
+
+struct TestException {};
+
+int throwCounter = 1;
+void MaybeThrow() {
+  if (!--throwCounter) {
+    throw TestException();
+  }
+}
+
+const int kMagic = 0xdeadbeef;
+struct Thrower {
+  static int alive;
+
+  Thrower() : magic(kMagic) {
+    EXPECT_EQ(magic, kMagic);
+    MaybeThrow();
+    ++alive;
+  }
+  Thrower(Thrower const& other) : magic(other.magic) {
+    EXPECT_EQ(magic, kMagic);
+    MaybeThrow();
+    ++alive;
+  }
+  ~Thrower() noexcept {
+    EXPECT_EQ(magic, kMagic);
+    magic = 0;
+    --alive;
+  }
+
+  Thrower& operator=(Thrower const& other) {
+    EXPECT_EQ(magic, kMagic);
+    MaybeThrow();
+    return *this;
+  }
+
+  // This is just to try to make sure we don't get our member
+  // functions called on uninitialized memory.
+  int magic;
+};
+
+int Thrower::alive = 0;
+
+// Type that counts how many exist and doesn't support copy
+// construction.
+struct NoncopyableCounter {
+  static int alive;
+  NoncopyableCounter() {
+    ++alive;
+  }
+  ~NoncopyableCounter() {
+    --alive;
+  }
+  NoncopyableCounter(NoncopyableCounter&&) { ++alive; }
+  NoncopyableCounter(NoncopyableCounter const&) = delete;
+  NoncopyableCounter& operator=(NoncopyableCounter const&) const = delete;
+  NoncopyableCounter& operator=(NoncopyableCounter&&) { return *this; }
+};
+int NoncopyableCounter::alive = 0;
+
+// Check that throws don't break the basic guarantee for some cases.
+// Uses the method for testing exception safety described at
+// http://www.boost.org/community/exception_safety.html, to force all
+// throwing code paths to occur.
+struct TestBasicGuarantee {
+  folly::small_vector<Thrower,3> vec;
+  int const prepopulate;
+
+  explicit TestBasicGuarantee(int prepopulate)
+    : prepopulate(prepopulate)
+  {
+    throwCounter = 1000;
+    for (int i = 0; i < prepopulate; ++i) {
+      vec.push_back(Thrower());
+    }
+  }
+
+  ~TestBasicGuarantee() {
+    throwCounter = 1000;
+  }
+
+  template<class Operation>
+  void operator()(int insertCount, Operation const& op) {
+    bool done = false;
+
+    std::unique_ptr<folly::small_vector<Thrower,3> > workingVec;
+    for (int counter = 1; !done; ++counter) {
+      throwCounter = 1000;
+      workingVec.reset(new folly::small_vector<Thrower,3>(vec));
+      throwCounter = counter;
+      EXPECT_EQ(Thrower::alive, prepopulate * 2);
+      try {
+        op(*workingVec);
+        done = true;
+      } catch (...) {
+        // Note that the size of the vector can change if we were
+        // inserting somewhere other than the end (it's a basic only
+        // guarantee).  All we're testing here is that we have the
+        // right amount of uninitialized vs initialized memory.
+        EXPECT_EQ(Thrower::alive, workingVec->size() + vec.size());
+        continue;
+      }
+
+      // If things succeeded.
+      EXPECT_EQ(workingVec->size(), prepopulate + insertCount);
+      EXPECT_EQ(Thrower::alive, prepopulate * 2 + insertCount);
+    }
+  }
+};
+
+}
+
+TEST(small_vector, BasicGuarantee) {
+  for (int prepop = 1; prepop < 30; ++prepop) {
+    (TestBasicGuarantee(prepop))( // parens or a mildly vexing parse :(
+      1,
+      [&] (folly::small_vector<Thrower,3>& v) {
+        v.push_back(Thrower());
+      }
+    );
+
+    EXPECT_EQ(Thrower::alive, 0);
+
+    (TestBasicGuarantee(prepop))(
+      1,
+      [&] (folly::small_vector<Thrower,3>& v) {
+        v.insert(v.begin(), Thrower());
+      }
+    );
+
+    EXPECT_EQ(Thrower::alive, 0);
+
+    (TestBasicGuarantee(prepop))(
+      1,
+      [&] (folly::small_vector<Thrower,3>& v) {
+        v.insert(v.begin() + 1, Thrower());
+      }
+    );
+
+    EXPECT_EQ(Thrower::alive, 0);
+  }
+
+  TestBasicGuarantee(4)(
+    3,
+    [&] (folly::small_vector<Thrower,3>& v) {
+      std::vector<Thrower> b;
+      b.push_back(Thrower());
+      b.push_back(Thrower());
+      b.push_back(Thrower());
+
+      /*
+       * Apparently if you do the following initializer_list instead
+       * of the above push_back's, and one of the Throwers throws,
+       * g++4.6 doesn't destruct the previous ones.  Heh.
+       */
+      //b = { Thrower(), Thrower(), Thrower() };
+      v.insert(v.begin() + 1, b.begin(), b.end());
+    }
+  );
+
+  TestBasicGuarantee(2)(
+    6,
+    [&] (folly::small_vector<Thrower,3>& v) {
+      std::vector<Thrower> b;
+      for (int i = 0; i < 6; ++i) {
+        b.push_back(Thrower());
+      }
+
+      v.insert(v.begin() + 1, b.begin(), b.end());
+    }
+  );
+
+  EXPECT_EQ(Thrower::alive, 0);
+  try {
+    throwCounter = 4;
+    folly::small_vector<Thrower,1> p(14, Thrower());
+  } catch (...) {
+  }
+  EXPECT_EQ(Thrower::alive, 0);
+}
+
+// Run this with.
+// MALLOC_CONF=prof_leak:true
+// LD_PRELOAD=${JEMALLOC_PATH}/lib/libjemalloc.so.1
+// LD_PRELOAD="$LD_PRELOAD:"${UNWIND_PATH}/lib/libunwind.so.7
+TEST(small_vector, leak_test) {
+  for (int j = 0; j < 1000; ++j) {
+    folly::small_vector<int, 10> someVec(300);
+    for (int i = 0; i < 10000; ++i) {
+      someVec.push_back(12);
+    }
+  }
+}
+
+TEST(small_vector, Insert) {
+  folly::small_vector<int> someVec(3, 3);
+  someVec.insert(someVec.begin(), 12, 12);
+  EXPECT_EQ(someVec.size(), 15);
+  for (int i = 0; i < someVec.size(); ++i) {
+    if (i < 12) {
+      EXPECT_EQ(someVec[i], 12);
+    } else {
+      EXPECT_EQ(someVec[i], 3);
+    }
+  }
+
+  auto oldSize = someVec.size();
+  someVec.insert(someVec.begin() + 1, 12, 12);
+  EXPECT_EQ(someVec.size(), oldSize + 12);
+
+  folly::small_vector<std::string> v1(6, "asd"), v2(7, "wat");
+  v1.insert(v1.begin() + 1, v2.begin(), v2.end());
+  EXPECT_TRUE(v1.size() == 6 + 7);
+  EXPECT_EQ(v1.front(), "asd");
+  EXPECT_EQ(v1[1], "wat");
+}
+
+TEST(small_vector, Swap) {
+  folly::small_vector<int,10> somethingVec, emptyVec;
+  somethingVec.push_back(1);
+  somethingVec.push_back(2);
+  somethingVec.push_back(3);
+  somethingVec.push_back(4);
+
+  // Swapping intern'd with intern'd.
+  auto vec = somethingVec;
+  EXPECT_TRUE(vec == somethingVec);
+  EXPECT_FALSE(vec == emptyVec);
+  EXPECT_FALSE(somethingVec == emptyVec);
+
+  // Swapping a heap vector with an intern vector.
+  folly::small_vector<int,10> junkVec;
+  junkVec.assign(12, 12);
+  EXPECT_EQ(junkVec.size(), 12);
+  for (auto i : junkVec) {
+    EXPECT_EQ(i, 12);
+  }
+  swap(junkVec, vec);
+  EXPECT_TRUE(junkVec == somethingVec);
+  EXPECT_EQ(vec.size(), 12);
+  for (auto i : vec) {
+    EXPECT_EQ(i, 12);
+  }
+
+  // Swapping two heap vectors.
+  folly::small_vector<int,10> moreJunk(15, 15);
+  EXPECT_EQ(moreJunk.size(), 15);
+  for (auto i : moreJunk) {
+    EXPECT_EQ(i, 15);
+  }
+  swap(vec, moreJunk);
+  EXPECT_EQ(moreJunk.size(), 12);
+  for (auto i : moreJunk) {
+    EXPECT_EQ(i, 12);
+  }
+  EXPECT_EQ(vec.size(), 15);
+  for (auto i : vec) {
+    EXPECT_EQ(i, 15);
+  }
+
+  // Making a vector heap, then smaller than another non-heap vector,
+  // then swapping.
+  folly::small_vector<int,5> shrinker, other(4, 10);
+  shrinker = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+  shrinker.erase(shrinker.begin() + 2, shrinker.end());
+  EXPECT_LT(shrinker.size(), other.size());
+  swap(shrinker, other);
+  EXPECT_EQ(shrinker.size(), 4);
+  EXPECT_TRUE(boost::all(shrinker, boost::is_any_of(std::vector<int>{10})));
+  EXPECT_TRUE((other == small_vector<int,5>{ 0, 1 }));
+}
+
+TEST(small_vector, Emplace) {
+  NontrivialType::ctored = 0;
+
+  folly::small_vector<NontrivialType> vec;
+  vec.reserve(1024);
+  vec.emplace_back(12);
+  EXPECT_EQ(NontrivialType::ctored, 1);
+  EXPECT_EQ(vec.front().a, 12);
+  vec.emplace_back(13);
+  EXPECT_EQ(vec.front().a, 12);
+  EXPECT_EQ(vec.back().a, 13);
+  EXPECT_EQ(NontrivialType::ctored, 2);
+
+  NontrivialType::ctored = 0;
+  for (int i = 0; i < 120; ++i) {
+    vec.emplace_back(i);
+  }
+  EXPECT_EQ(NontrivialType::ctored, 120);
+  EXPECT_EQ(vec[0].a, 12);
+  EXPECT_EQ(vec[1].a, 13);
+  EXPECT_EQ(vec.back().a, 119);
+
+  // We implement emplace() with a temporary (see the implementation
+  // for a comment about why), so this should make 2 ctor calls.
+  NontrivialType::ctored = 0;
+  vec.emplace(vec.begin(), 12);
+  EXPECT_EQ(NontrivialType::ctored, 2);
+}
+
+TEST(small_vector, Erase) {
+  folly::small_vector<int,4> notherVec = { 1, 2, 3, 4, 5 };
+  EXPECT_EQ(notherVec.front(), 1);
+  EXPECT_EQ(notherVec.size(), 5);
+  notherVec.erase(notherVec.begin());
+  EXPECT_EQ(notherVec.front(), 2);
+  EXPECT_EQ(notherVec.size(), 4);
+  EXPECT_EQ(notherVec[2], 4);
+  EXPECT_EQ(notherVec[3], 5);
+  notherVec.erase(notherVec.begin() + 2);
+  EXPECT_EQ(notherVec.size(), 3);
+  EXPECT_EQ(notherVec[2], 5);
+
+  folly::small_vector<int,2> vec2 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+  vec2.erase(vec2.begin() + 1, vec2.end() - 1);
+  folly::small_vector<int,2> expected = { 1, 10 };
+  EXPECT_TRUE(vec2 == expected);
+
+  folly::small_vector<std::string,3> v(102, "ASD");
+  v.resize(1024, "D");
+  EXPECT_EQ(v.size(), 1024);
+  EXPECT_EQ(v.back(), "D");
+  EXPECT_EQ(v.front(), "ASD");
+  v.resize(1);
+  EXPECT_EQ(v.front(), "ASD");
+  EXPECT_EQ(v.size(), 1);
+  v.resize(0);
+  EXPECT_TRUE(v.empty());
+}
+
+TEST(small_vector, GrowShrinkGrow) {
+  folly::small_vector<NontrivialType,7> vec = { 1, 2, 3, 4, 5 };
+  std::generate_n(std::back_inserter(vec), 102, std::rand);
+
+  auto capacity = vec.capacity();
+
+  auto oldSize = vec.size();
+  for (int i = 0; i < oldSize; ++i) {
+    vec.erase(vec.begin() + (std::rand() % vec.size()));
+    EXPECT_EQ(vec.capacity(), capacity);
+  }
+  EXPECT_TRUE(vec.empty());
+
+  EXPECT_EQ(vec.capacity(), capacity);
+  std::generate_n(std::back_inserter(vec), 102, std::rand);
+  EXPECT_EQ(vec.capacity(), capacity);
+
+  std::generate_n(std::back_inserter(vec), 4096, std::rand);
+  EXPECT_GT(vec.capacity(), capacity);
+
+  vec.resize(10);
+  vec.shrink_to_fit();
+  EXPECT_LT(vec.capacity(), capacity);
+  vec.resize(4);
+  vec.shrink_to_fit();
+  EXPECT_EQ(vec.capacity(), 7); // in situ size
+}
+
+TEST(small_vector, Iteration) {
+  folly::small_vector<std::string,3> vec = { "foo", "bar" };
+  vec.push_back("blah");
+  vec.push_back("blah2");
+  vec.push_back("blah3");
+  vec.erase(vec.begin() + 2);
+
+  std::vector<std::string> otherVec;
+  for (auto& s : vec) {
+    otherVec.push_back(s);
+  }
+  EXPECT_EQ(otherVec.size(), vec.size());
+  if (otherVec.size() == vec.size()) {
+    EXPECT_TRUE(std::equal(otherVec.begin(), otherVec.end(), vec.begin()));
+  }
+
+  std::reverse(otherVec.begin(), otherVec.end());
+  auto oit = otherVec.begin();
+  auto rit = vec.crbegin();
+  for (; rit != vec.crend(); ++oit, ++rit) {
+    EXPECT_EQ(*oit, *rit);
+  }
+}
+
+TEST(small_vector, NonCopyableType) {
+  folly::small_vector<std::unique_ptr<std::string>,2> vec;
+  for (int i = 0; i < 10; ++i) {
+    vec.emplace(vec.begin(), new std::string("asd"));
+  }
+  EXPECT_EQ(vec.size(), 10);
+  auto vec2 = std::move(vec);
+  EXPECT_EQ(vec.size(), 0);
+  EXPECT_EQ(vec2.size(), 10);
+  vec2.clear();
+
+  folly::small_vector<NoncopyableCounter,3> vec3;
+  for (int i = 0; i < 10; ++i) {
+    EXPECT_EQ(vec3.size(), i);
+    EXPECT_EQ(NoncopyableCounter::alive, i);
+    vec3.insert(vec3.begin(), NoncopyableCounter());
+  }
+  EXPECT_EQ(vec3.size(), 10);
+  EXPECT_EQ(NoncopyableCounter::alive, 10);
+
+  vec3.insert(vec3.begin() + 3, NoncopyableCounter());
+  EXPECT_EQ(NoncopyableCounter::alive, 11);
+  auto vec4 = std::move(vec3);
+  EXPECT_EQ(NoncopyableCounter::alive, 11);
+  vec4.resize(30);
+  EXPECT_EQ(NoncopyableCounter::alive, 30);
+  vec4.erase(vec4.begin(), vec4.end());
+  EXPECT_EQ(vec4.size(), 0);
+  EXPECT_EQ(NoncopyableCounter::alive, 0);
+}
+
+TEST(small_vector, MoveConstructor) {
+  folly::small_vector<std::string,10> v1;
+  v1.push_back("asd");
+  v1.push_back("bsd");
+  auto v2 = std::move(v1);
+  EXPECT_EQ(v2.size(), 2);
+  EXPECT_EQ(v2[0], "asd");
+  EXPECT_EQ(v2[1], "bsd");
+
+  v1 = std::move(v2);
+  EXPECT_EQ(v1.size(), 2);
+  EXPECT_EQ(v1[0], "asd");
+  EXPECT_EQ(v1[1], "bsd");
+}
+
+TEST(small_vector, NoHeap) {
+  typedef folly::small_vector<std::string,10,
+    std::size_t,folly::small_vector_policy::NoHeap> Vector;
+
+  Vector v;
+  EXPECT_EQ(v.max_size(), 10);
+
+  for (int i = 0; i < 10; ++i) {
+    v.push_back(folly::to<std::string>(i));
+    EXPECT_EQ(v.size(), i + 1);
+  }
+
+  bool caught = false;
+  try {
+    v.insert(v.begin(), "ha");
+  } catch (const std::length_error&) {
+    caught = true;
+  }
+  EXPECT_TRUE(caught);
+
+  // Check max_size works right with various policy combinations.
+  folly::small_vector<std::string,32,uint32_t,NoHeap,OneBitMutex> v2;
+  EXPECT_EQ(v2.max_size(), 32);
+  folly::small_vector<std::string,32,uint32_t,OneBitMutex> v3;
+  EXPECT_EQ(v3.max_size(), (1ul << 30) - 1);
+  folly::small_vector<std::string,32,uint32_t> v4;
+  EXPECT_EQ(v4.max_size(), (1ul << 31) - 1);
+
+  /*
+   * Test that even when we ask for a small number inlined it'll still
+   * inline at least as much as it takes to store the value_type
+   * pointer.
+   */
+  folly::small_vector<char,1,NoHeap> notsosmall;
+  EXPECT_EQ(notsosmall.max_size(), sizeof(char*));
+  caught = false;
+  try {
+    notsosmall.push_back(12);
+    notsosmall.push_back(13);
+    notsosmall.push_back(14);
+  } catch (const std::length_error&) {
+    caught = true;
+  }
+  EXPECT_FALSE(caught);
+}
+
+TEST(small_vector, MaxSize) {
+  folly::small_vector<int,2,uint8_t> vec;
+  EXPECT_EQ(vec.max_size(), 127);
+  folly::small_vector<int,2,uint16_t> vec2;
+  EXPECT_EQ(vec2.max_size(), (1 << 15) - 1);
+  folly::small_vector<int,2,uint16_t,OneBitMutex> vec3;
+  EXPECT_EQ(vec3.max_size(), (1 << 14) - 1);
+}
+
+TEST(small_vector, AllHeap) {
+  // Use something bigger than the pointer so it can't get inlined.
+  struct SomeObj {
+    double a, b, c, d, e; int val;
+    SomeObj(int val) : val(val) {}
+    bool operator==(SomeObj const& o) const {
+      return o.val == val;
+    }
+  };
+
+  folly::small_vector<SomeObj,0> vec = { 1 };
+  EXPECT_EQ(vec.size(), 1);
+  if (!vec.empty()) {
+    EXPECT_TRUE(vec[0] == 1);
+  }
+  vec.insert(vec.begin(), { 0, 1, 2, 3 });
+  EXPECT_EQ(vec.size(), 5);
+  EXPECT_TRUE((vec == folly::small_vector<SomeObj,0>{ 0, 1, 2, 3, 1 }));
+}
+
+TEST(small_vector, Basic) {
+  typedef folly::small_vector<int,3,uint32_t
+#ifdef __x86_64__
+    ,OneBitMutex
+#endif
+  > Vector;
+
+  Vector a;
+
+#ifdef __x86_64__
+  a.lock();
+  a.unlock();
+#endif
+
+  a.push_back(12);
+  EXPECT_EQ(a.front(), 12);
+  EXPECT_EQ(a.size(), 1);
+  a.push_back(13);
+  EXPECT_EQ(a.size(), 2);
+  EXPECT_EQ(a.front(), 12);
+  EXPECT_EQ(a.back(), 13);
+
+  a.emplace(a.end(), 32);
+  EXPECT_EQ(a.back(), 32);
+
+  a.emplace(a.begin(), 12);
+  EXPECT_EQ(a.front(), 12);
+  EXPECT_EQ(a.back(), 32);
+  a.erase(a.end() - 1);
+  EXPECT_EQ(a.back(), 13);
+
+  a.push_back(12);
+  EXPECT_EQ(a.back(), 12);
+  a.pop_back();
+  EXPECT_EQ(a.back(), 13);
+
+  const int s = 12;
+  a.push_back(s); // lvalue reference
+
+  Vector b, c;
+  b = a;
+  EXPECT_TRUE(b == a);
+  c = std::move(b);
+  EXPECT_TRUE(c == a);
+  EXPECT_TRUE(c != b && b != a);
+
+  EXPECT_GT(c.size(), 0);
+  c.resize(1);
+  EXPECT_EQ(c.size(), 1);
+
+  Vector intCtor(12);
+}
+
+TEST(small_vector, Capacity) {
+  folly::small_vector<unsigned long, 1> vec;
+  EXPECT_EQ(vec.size(), 0);
+  EXPECT_EQ(vec.capacity(), 1);
+
+  vec.push_back(0);
+  EXPECT_EQ(vec.size(), 1);
+  EXPECT_EQ(vec.capacity(), 1);
+
+  vec.push_back(1);
+  EXPECT_EQ(vec.size(), 2);
+  EXPECT_GT(vec.capacity(), 1);
+
+
+  folly::small_vector<unsigned long, 2> vec2;
+  EXPECT_EQ(vec2.size(), 0);
+  EXPECT_EQ(vec2.capacity(), 2);
+
+  vec2.push_back(0);
+  vec2.push_back(1);
+  EXPECT_EQ(vec2.size(), 2);
+  EXPECT_EQ(vec2.capacity(), 2);
+
+  vec2.push_back(2);
+  EXPECT_EQ(vec2.size(), 3);
+  EXPECT_GT(vec2.capacity(), 2);
+
+  // Both have grown by the minimum amount
+  EXPECT_EQ(vec.capacity(), vec2.capacity());
+
+  // Test capacity heapifying logic
+  folly::small_vector<unsigned char, 1> vec3;
+  const size_t hc_size = 1000000;
+  for (size_t i = 0; i < hc_size; ++i) {
+    auto v = (unsigned char)i;
+    vec3.push_back(v);
+    EXPECT_EQ(vec3[i], v);
+    EXPECT_EQ(vec3.size(), i + 1);
+    EXPECT_GT(vec3.capacity(), i);
+  }
+  for (auto i = hc_size; i > 0; --i) {
+    auto v = (unsigned char)(i - 1);
+    EXPECT_EQ(vec3.back(), v);
+    vec3.pop_back();
+    EXPECT_EQ(vec3.size(), i - 1);
+  }
+}
+
+TEST(small_vector, SelfPushBack) {
+  for (int i = 1; i < 33; ++i) {
+    folly::small_vector<std::string> vec;
+    for (int j = 0; j < i; ++j) {
+      vec.push_back("abc");
+    }
+    EXPECT_EQ(vec.size(), i);
+    vec.push_back(std::move(vec[0]));
+    EXPECT_EQ(vec.size(), i + 1);
+
+    EXPECT_EQ(vec[i], "abc");
+  }
+}
+
+TEST(small_vector, SelfEmplaceBack) {
+  for (int i = 1; i < 33; ++i) {
+    folly::small_vector<std::string> vec;
+    for (int j = 0; j < i; ++j) {
+      vec.emplace_back("abc");
+    }
+    EXPECT_EQ(vec.size(), i);
+    vec.emplace_back(std::move(vec[0]));
+    EXPECT_EQ(vec.size(), i + 1);
+
+    EXPECT_EQ(vec[i], "abc");
+  }
+}
+
+TEST(small_vector, SelfInsert) {
+  // end insert
+  for (int i = 1; i < 33; ++i) {
+    folly::small_vector<std::string> vec;
+    for (int j = 0; j < i; ++j) {
+      vec.push_back("abc");
+    }
+    EXPECT_EQ(vec.size(), i);
+    vec.insert(vec.end(), std::move(vec[0]));
+    EXPECT_EQ(vec.size(), i + 1);
+
+    EXPECT_EQ(vec[i], "abc");
+    EXPECT_EQ(vec[vec.size() - 1], "abc");
+  }
+
+  // middle insert
+  for (int i = 2; i < 33; ++i) {
+    folly::small_vector<std::string> vec;
+    for (int j = 0; j < i; ++j) {
+      vec.push_back("abc");
+    }
+    EXPECT_EQ(vec.size(), i);
+    vec.insert(vec.end()-1, std::move(vec[0]));
+    EXPECT_EQ(vec.size(), i + 1);
+
+    EXPECT_EQ(vec[i-1], "abc");
+    EXPECT_EQ(vec[i], "abc");
+  }
+}
diff --git a/folly/test/sorted_vector_test.cpp b/folly/test/sorted_vector_test.cpp
new file mode 100644
index 00000000..c5e6e43f
--- /dev/null
+++ b/folly/test/sorted_vector_test.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/sorted_vector_types.h"
+#include <gtest/gtest.h>
+#include <list>
+
+using folly::sorted_vector_set;
+using folly::sorted_vector_map;
+
+namespace {
+
+template<class T>
+struct less_invert : std::binary_function<T,T,bool> {
+  bool operator()(const T& a, const T& b) const {
+    return b < a;
+  }
+};
+
+template<class Container>
+void check_invariant(Container& c) {
+  auto it = c.begin();
+  auto end = c.end();
+  if (it == end)
+    return;
+  auto prev = it;
+  ++it;
+  for (; it != end; ++it, ++prev) {
+    EXPECT_TRUE(c.value_comp()(*prev, *it));
+  }
+}
+
+struct OneAtATimePolicy {
+  template<class Container>
+  void increase_capacity(Container& c) {
+    if (c.size() == c.capacity()) {
+      c.reserve(c.size() + 1);
+    }
+  }
+};
+
+struct CountCopyCtor {
+  explicit CountCopyCtor() : val_(0) {}
+
+  explicit CountCopyCtor(int val) : val_(val), count_(0) {}
+
+  CountCopyCtor(const CountCopyCtor& c)
+    : val_(c.val_)
+    , count_(c.count_ + 1)
+  {}
+
+  bool operator<(const CountCopyCtor& o) const {
+    return val_ < o.val_;
+  }
+
+  int val_;
+  int count_;
+};
+
+}
+
+TEST(SortedVectorTypes, SimpleSetTest) {
+  sorted_vector_set<int> s;
+  EXPECT_TRUE(s.empty());
+  for (int i = 0; i < 1000; ++i) {
+    s.insert(rand() % 100000);
+  }
+  EXPECT_FALSE(s.empty());
+  check_invariant(s);
+
+  sorted_vector_set<int> s2;
+  s2.insert(s.begin(), s.end());
+  check_invariant(s2);
+  EXPECT_TRUE(s == s2);
+
+  auto it = s2.lower_bound(32);
+  if (*it == 32) {
+    s2.erase(it);
+    it = s2.lower_bound(32);
+  }
+  check_invariant(s2);
+  auto oldSz = s2.size();
+  s2.insert(it, 32);
+  EXPECT_TRUE(s2.size() == oldSz + 1);
+  check_invariant(s2);
+
+  const sorted_vector_set<int>& cs2 = s2;
+  auto range = cs2.equal_range(32);
+  auto lbound = cs2.lower_bound(32);
+  auto ubound = cs2.upper_bound(32);
+  EXPECT_TRUE(range.first == lbound);
+  EXPECT_TRUE(range.second == ubound);
+  EXPECT_TRUE(range.first != cs2.end());
+  EXPECT_TRUE(range.second != cs2.end());
+  EXPECT_TRUE(cs2.count(32) == 1);
+  EXPECT_FALSE(cs2.find(32) == cs2.end());
+
+  // Bad insert hint.
+  s2.insert(s2.begin() + 3, 33);
+  EXPECT_TRUE(s2.find(33) != s2.begin());
+  EXPECT_TRUE(s2.find(33) != s2.end());
+  check_invariant(s2);
+  s2.erase(33);
+  check_invariant(s2);
+
+  it = s2.find(32);
+  EXPECT_FALSE(it == s2.end());
+  s2.erase(it);
+  EXPECT_TRUE(s2.size() == oldSz);
+  check_invariant(s2);
+
+  sorted_vector_set<int> cpy(s);
+  check_invariant(cpy);
+  EXPECT_TRUE(cpy == s);
+  sorted_vector_set<int> cpy2(s);
+  cpy2.insert(100001);
+  EXPECT_TRUE(cpy2 != cpy);
+  EXPECT_TRUE(cpy2 != s);
+  check_invariant(cpy2);
+  EXPECT_TRUE(cpy2.count(100001) == 1);
+  s.swap(cpy2);
+  check_invariant(cpy2);
+  check_invariant(s);
+  EXPECT_TRUE(s != cpy);
+  EXPECT_TRUE(s != cpy2);
+  EXPECT_TRUE(cpy2 == cpy);
+}
+
+TEST(SortedVectorTypes, SimpleMapTest) {
+  sorted_vector_map<int,float> m;
+  for (int i = 0; i < 1000; ++i) {
+    m[i] = i / 1000.0;
+  }
+  check_invariant(m);
+
+  m[32] = 100.0;
+  check_invariant(m);
+  EXPECT_TRUE(m.count(32) == 1);
+  EXPECT_FALSE(m.find(32) == m.end());
+  m.erase(32);
+  EXPECT_TRUE(m.find(32) == m.end());
+  check_invariant(m);
+
+  sorted_vector_map<int,float> m2 = m;
+  EXPECT_TRUE(m2 == m);
+  EXPECT_FALSE(m2 != m);
+  auto it = m2.lower_bound(1 << 20);
+  EXPECT_TRUE(it == m2.end());
+  m2.insert(it, std::make_pair(1 << 20, 10.0f));
+  check_invariant(m2);
+  EXPECT_TRUE(m2.count(1 << 20) == 1);
+  EXPECT_TRUE(m < m2);
+  EXPECT_TRUE(m <= m2);
+
+  const sorted_vector_map<int,float>& cm = m;
+  auto range = cm.equal_range(42);
+  auto lbound = cm.lower_bound(42);
+  auto ubound = cm.upper_bound(42);
+  EXPECT_TRUE(range.first == lbound);
+  EXPECT_TRUE(range.second == ubound);
+  EXPECT_FALSE(range.first == cm.end());
+  EXPECT_FALSE(range.second == cm.end());
+  m.erase(m.lower_bound(42));
+  check_invariant(m);
+
+  sorted_vector_map<int,float> m3;
+  m3.insert(m2.begin(), m2.end());
+  check_invariant(m3);
+  EXPECT_TRUE(m3 == m2);
+  EXPECT_FALSE(m3 == m);
+
+  EXPECT_TRUE(m != m2);
+  EXPECT_TRUE(m2 == m3);
+  EXPECT_TRUE(m3 != m);
+  m.swap(m3);
+  check_invariant(m);
+  check_invariant(m2);
+  check_invariant(m3);
+  EXPECT_TRUE(m3 != m2);
+  EXPECT_TRUE(m3 != m);
+  EXPECT_TRUE(m == m2);
+
+  // Bad insert hint.
+  m.insert(m.begin() + 3, std::make_pair(1 << 15, 1.0f));
+  check_invariant(m);
+}
+
+TEST(SortedVectorTypes, Sizes) {
+  EXPECT_EQ(sizeof(sorted_vector_set<int>),
+            sizeof(std::vector<int>));
+  EXPECT_EQ(sizeof(sorted_vector_map<int,int>),
+            sizeof(std::vector<std::pair<int,int> >));
+
+  typedef sorted_vector_set<int,std::less<int>,
+    std::allocator<int>,OneAtATimePolicy> SetT;
+  typedef sorted_vector_map<int,int,std::less<int>,
+    std::allocator<int>,OneAtATimePolicy> MapT;
+
+  EXPECT_EQ(sizeof(SetT), sizeof(std::vector<int>));
+  EXPECT_EQ(sizeof(MapT), sizeof(std::vector<std::pair<int,int> >));
+}
+
+TEST(SortedVectorTypes, CustomCompare) {
+  sorted_vector_set<int,less_invert<int> > s;
+  for (int i = 0; i < 200; ++i)
+    s.insert(i);
+  check_invariant(s);
+
+  sorted_vector_map<int,float,less_invert<int> > m;
+  for (int i = 0; i < 200; ++i)
+    m[i] = 12.0;
+  check_invariant(m);
+}
+
+TEST(SortedVectorTypes, GrowthPolicy) {
+  typedef sorted_vector_set<CountCopyCtor,
+                            std::less<CountCopyCtor>,
+                            std::allocator<CountCopyCtor>,
+                            OneAtATimePolicy>
+    SetT;
+
+  SetT a;
+  for (int i = 0; i < 20; ++i) {
+    a.insert(CountCopyCtor(i));
+  }
+  check_invariant(a);
+  SetT::iterator it = a.begin();
+  EXPECT_FALSE(it == a.end());
+  if (it != a.end()) {
+    EXPECT_EQ(it->val_, 0);
+    // 1 copy for the initial insertion, 19 more for reallocs on the
+    // additional insertions.
+    EXPECT_EQ(it->count_, 20);
+  }
+
+  std::list<CountCopyCtor> v;
+  for (int i = 0; i < 20; ++i) {
+    v.push_back(CountCopyCtor(20 + i));
+  }
+  a.insert(v.begin(), v.end());
+  check_invariant(a);
+
+  it = a.begin();
+  EXPECT_FALSE(it == a.end());
+  if (it != a.end()) {
+    EXPECT_EQ(it->val_, 0);
+    // Should be only 1 more copy for inserting this above range.
+    EXPECT_EQ(it->count_, 21);
+  }
+}
+
+TEST(SortedVectorTest, EmptyTest) {
+  sorted_vector_set<int> emptySet;
+  EXPECT_TRUE(emptySet.lower_bound(10) == emptySet.end());
+  EXPECT_TRUE(emptySet.find(10) == emptySet.end());
+
+  sorted_vector_map<int,int> emptyMap;
+  EXPECT_TRUE(emptyMap.lower_bound(10) == emptyMap.end());
+  EXPECT_TRUE(emptyMap.find(10) == emptyMap.end());
+}
-- 
2.34.1