/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <limits.h>
#include <pthread.h>
-#include <list>
+
+#include <mutex>
#include <string>
#include <vector>
-#include <boost/thread/locks.hpp>
-#include <boost/thread/mutex.hpp>
-#include <boost/thread/locks.hpp>
-
#include <glog/logging.h>
-#include "folly/Exception.h"
-#include "folly/Foreach.h"
-#include "folly/Malloc.h"
+#include <folly/Foreach.h>
+#include <folly/Exception.h>
+#include <folly/Malloc.h>
+
+// In general, emutls cleanup is not guaranteed to play nice with the way
+// StaticMeta mixes direct pthread calls and the use of __thread. This has
+// caused problems on multiple platforms so don't use __thread there.
+//
+// XXX: Ideally we would instead determine if emutls is in use at runtime as it
+// is possible to configure glibc on Linux to use emutls regardless.
+#if !__APPLE__ && !__ANDROID__
+#define FOLLY_TLD_USE_FOLLY_TLS 1
+#else
+#undef FOLLY_TLD_USE_FOLLY_TLS
+#endif
namespace folly {
namespace threadlocal_detail {
* This must be POD, as we memset() it to 0 and memcpy() it around.
*/
struct ElementWrapper {
- void dispose(TLPDestructionMode mode) {
- if (ptr != NULL) {
- DCHECK(deleter != NULL);
- deleter->dispose(ptr, mode);
- if (ownsDeleter) {
- delete deleter;
- }
- ptr = NULL;
- deleter = NULL;
- ownsDeleter = false;
+ bool dispose(TLPDestructionMode mode) {
+ if (ptr == nullptr) {
+ return false;
}
+
+ DCHECK(deleter != nullptr);
+ deleter->dispose(ptr, mode);
+ cleanup();
+ return true;
+ }
+
+ void* release() {
+ auto retPtr = ptr;
+
+ if (ptr != nullptr) {
+ cleanup();
+ }
+
+ return retPtr;
}
template <class Ptr>
void set(Ptr p) {
- DCHECK(ptr == NULL);
- DCHECK(deleter == NULL);
+ DCHECK(ptr == nullptr);
+ DCHECK(deleter == nullptr);
if (p) {
// We leak a single object here but that is ok. If we used an
template <class Ptr, class Deleter>
void set(Ptr p, Deleter d) {
- DCHECK(ptr == NULL);
- DCHECK(deleter == NULL);
+ DCHECK(ptr == nullptr);
+ DCHECK(deleter == nullptr);
if (p) {
ptr = p;
deleter = new CustomDeleter<Ptr,Deleter>(d);
}
}
+ void cleanup() {
+ if (ownsDeleter) {
+ delete deleter;
+ }
+ ptr = nullptr;
+ deleter = nullptr;
+ ownsDeleter = false;
+ }
+
void* ptr;
DeleterBase* deleter;
bool ownsDeleter;
return *inst_;
}
- int nextId_;
- std::vector<int> freeIds_;
- boost::mutex lock_;
+ uint32_t nextId_;
+ std::vector<uint32_t> freeIds_;
+ std::mutex lock_;
pthread_key_t pthreadKey_;
ThreadEntry head_;
t->next = t->prev = t;
}
- static __thread ThreadEntry threadEntry_;
+#ifdef FOLLY_TLD_USE_FOLLY_TLS
+ static FOLLY_TLS ThreadEntry threadEntry_;
+#endif
static StaticMeta<Tag>* inst_;
StaticMeta() : nextId_(1) {
int ret = pthread_key_create(&pthreadKey_, &onThreadExit);
checkPosixError(ret, "pthread_key_create failed");
+#if FOLLY_HAVE_PTHREAD_ATFORK
ret = pthread_atfork(/*prepare*/ &StaticMeta::preFork,
/*parent*/ &StaticMeta::onForkParent,
/*child*/ &StaticMeta::onForkChild);
checkPosixError(ret, "pthread_atfork failed");
+#elif !__ANDROID__
+ // pthread_atfork is not part of the Android NDK at least as of n9d. If
+ // something is trying to call native fork() directly at all with Android's
+ // process management model, this is probably the least of the problems.
+ //
+ // But otherwise, this is a problem.
+ #warning pthread_atfork unavailable
+#endif
}
~StaticMeta() {
LOG(FATAL) << "StaticMeta lives forever!";
}
+ static ThreadEntry* getThreadEntry() {
+#ifdef FOLLY_TLD_USE_FOLLY_TLS
+ return &threadEntry_;
+#else
+ ThreadEntry* threadEntry =
+ static_cast<ThreadEntry*>(pthread_getspecific(inst_->pthreadKey_));
+ if (!threadEntry) {
+ threadEntry = new ThreadEntry();
+ int ret = pthread_setspecific(inst_->pthreadKey_, threadEntry);
+ checkPosixError(ret, "pthread_setspecific failed");
+ }
+ return threadEntry;
+#endif
+ }
+
static void preFork(void) {
instance().lock_.lock(); // Make sure it's created
}
}
static void onForkChild(void) {
+ // only the current thread survives
inst_->head_.next = inst_->head_.prev = &inst_->head_;
- inst_->push_back(&threadEntry_); // only the current thread survives
+ ThreadEntry* threadEntry = getThreadEntry();
+ // If this thread was in the list before the fork, add it back.
+ if (threadEntry->elementsCapacity != 0) {
+ inst_->push_back(threadEntry);
+ }
inst_->lock_.unlock();
}
static void onThreadExit(void* ptr) {
- auto & meta = instance();
+ auto& meta = instance();
+#ifdef FOLLY_TLD_USE_FOLLY_TLS
+ ThreadEntry* threadEntry = getThreadEntry();
+
DCHECK_EQ(ptr, &meta);
- // We wouldn't call pthread_setspecific unless we actually called get()
- DCHECK_NE(threadEntry_.elementsCapacity, 0);
+ DCHECK_GT(threadEntry->elementsCapacity, 0);
+#else
+ ThreadEntry* threadEntry = static_cast<ThreadEntry*>(ptr);
+#endif
{
- boost::lock_guard<boost::mutex> g(meta.lock_);
- meta.erase(&threadEntry_);
- // No need to hold the lock any longer; threadEntry_ is private to this
+ std::lock_guard<std::mutex> g(meta.lock_);
+ meta.erase(threadEntry);
+ // No need to hold the lock any longer; the ThreadEntry is private to this
// thread now that it's been removed from meta.
}
- FOR_EACH_RANGE(i, 0, threadEntry_.elementsCapacity) {
- threadEntry_.elements[i].dispose(TLPDestructionMode::THIS_THREAD);
+ // NOTE: User-provided deleter / object dtor itself may be using ThreadLocal
+ // with the same Tag, so dispose() calls below may (re)create some of the
+ // elements or even increase elementsCapacity, thus multiple cleanup rounds
+ // may be required.
+ for (bool shouldRun = true; shouldRun; ) {
+ shouldRun = false;
+ FOR_EACH_RANGE(i, 0, threadEntry->elementsCapacity) {
+ if (threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
+ shouldRun = true;
+ }
+ }
}
- free(threadEntry_.elements);
- threadEntry_.elements = NULL;
- pthread_setspecific(meta.pthreadKey_, NULL);
+ free(threadEntry->elements);
+ threadEntry->elements = nullptr;
+ pthread_setspecific(meta.pthreadKey_, nullptr);
+
+#ifndef FOLLY_TLD_USE_FOLLY_TLS
+ // Allocated in getThreadEntry() when not using folly TLS; free it
+ delete threadEntry;
+#endif
}
- static int create() {
- int id;
+ static uint32_t create() {
+ uint32_t id;
auto & meta = instance();
- boost::lock_guard<boost::mutex> g(meta.lock_);
+ std::lock_guard<std::mutex> g(meta.lock_);
if (!meta.freeIds_.empty()) {
id = meta.freeIds_.back();
meta.freeIds_.pop_back();
return id;
}
- static void destroy(int id) {
+ static void destroy(uint32_t id) {
try {
auto & meta = instance();
// Elements in other threads that use this id.
std::vector<ElementWrapper> elements;
{
- boost::lock_guard<boost::mutex> g(meta.lock_);
+ std::lock_guard<std::mutex> g(meta.lock_);
for (ThreadEntry* e = meta.head_.next; e != &meta.head_; e = e->next) {
if (id < e->elementsCapacity && e->elements[id].ptr) {
elements.push_back(e->elements[id]);
- // Writing another thread's ThreadEntry from here is fine;
- // the only other potential reader is the owning thread --
- // from onThreadExit (which grabs the lock, so is properly
- // synchronized with us) or from get() -- but using get() on a
- // ThreadLocalPtr object that's being destroyed is a bug, so
- // undefined behavior is fair game.
- e->elements[id].ptr = NULL;
- e->elements[id].deleter = NULL;
+ /*
+ * Writing another thread's ThreadEntry from here is fine;
+ * the only other potential reader is the owning thread --
+ * from onThreadExit (which grabs the lock, so is properly
+ * synchronized with us) or from get(), which also grabs
+ * the lock if it needs to resize the elements vector.
+ *
+ * We can't conflict with reads for a get(id), because
+ * it's illegal to call get on a thread local that's
+ * destructing.
+ */
+ e->elements[id].ptr = nullptr;
+ e->elements[id].deleter = nullptr;
+ e->elements[id].ownsDeleter = false;
}
}
meta.freeIds_.push_back(id);
}
/**
- * Reserve enough space in the threadEntry_.elements for the item
+ * Reserve enough space in the ThreadEntry::elements for the item
* @id to fit in.
*/
- static void reserve(int id) {
- size_t prevSize = threadEntry_.elementsCapacity;
- size_t newSize = static_cast<size_t>((id + 5) * 1.7);
+ static void reserve(uint32_t id) {
auto& meta = instance();
- ElementWrapper* ptr = nullptr;
- // Rely on jemalloc to zero the memory if possible -- maybe it knows
- // it's already zeroed and saves us some work.
- if (!usingJEMalloc() ||
- prevSize < jemallocMinInPlaceExpandable ||
- (rallocm(
- static_cast<void**>(static_cast<void*>(&threadEntry_.elements)),
- NULL, newSize * sizeof(ElementWrapper), 0,
- ALLOCM_NO_MOVE | ALLOCM_ZERO) != ALLOCM_SUCCESS)) {
- // Sigh, must realloc, but we can't call realloc here, as elements is
- // still linked in meta, so another thread might access invalid memory
- // after realloc succeeds. We'll copy by hand and update threadEntry_
- // under the lock.
+ ThreadEntry* threadEntry = getThreadEntry();
+ size_t prevCapacity = threadEntry->elementsCapacity;
+ // Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent
+ // very slow start.
+ size_t newCapacity = static_cast<size_t>((id + 5) * 1.7);
+ assert(newCapacity > prevCapacity);
+ ElementWrapper* reallocated = nullptr;
+
+ // Need to grow. Note that we can't call realloc, as elements is
+ // still linked in meta, so another thread might access invalid memory
+ // after realloc succeeds. We'll copy by hand and update our ThreadEntry
+ // under the lock.
+ if (usingJEMalloc()) {
+ bool success = false;
+ size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0);
+
+ // Try to grow in place.
//
- // Note that we're using calloc instead of malloc in order to zero
- // the entire region. rallocm (ALLOCM_ZERO) will only zero newly
- // allocated memory, so if a previous allocation allocated more than
- // we requested, it's our responsibility to guarantee that the tail
- // is zeroed. calloc() is simpler than malloc() followed by memset(),
- // and potentially faster when dealing with a lot of memory, as
- // it can get already-zeroed pages from the kernel.
- if ((ptr = static_cast<ElementWrapper*>(
- calloc(newSize, sizeof(ElementWrapper)))) != nullptr) {
- memcpy(ptr, threadEntry_.elements, sizeof(ElementWrapper) * prevSize);
+ // Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory,
+ // even if a previous allocation allocated more than we requested.
+ // This is fine; we always use MALLOCX_ZERO with jemalloc and we
+ // always expand our allocation to the real size.
+ if (prevCapacity * sizeof(ElementWrapper) >=
+ jemallocMinInPlaceExpandable) {
+ success = (xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO)
+ == newByteSize);
+ }
+
+ // In-place growth failed.
+ if (!success) {
+ success = ((reallocated = static_cast<ElementWrapper*>(
+ mallocx(newByteSize, MALLOCX_ZERO))) != nullptr);
+ }
+
+ if (success) {
+ // Expand to real size
+ assert(newByteSize / sizeof(ElementWrapper) >= newCapacity);
+ newCapacity = newByteSize / sizeof(ElementWrapper);
} else {
throw std::bad_alloc();
}
+ } else { // no jemalloc
+ // calloc() is simpler than malloc() followed by memset(), and
+ // potentially faster when dealing with a lot of memory, as it can get
+ // already-zeroed pages from the kernel.
+ reallocated = static_cast<ElementWrapper*>(
+ calloc(newCapacity, sizeof(ElementWrapper)));
+ if (!reallocated) {
+ throw std::bad_alloc();
+ }
}
// Success, update the entry
{
- boost::lock_guard<boost::mutex> g(meta.lock_);
- if (prevSize == 0) {
- meta.push_back(&threadEntry_);
+ std::lock_guard<std::mutex> g(meta.lock_);
+
+ if (prevCapacity == 0) {
+ meta.push_back(threadEntry);
}
- if (ptr) {
+
+ if (reallocated) {
+ /*
+ * Note: we need to hold the meta lock when copying data out of
+ * the old vector, because some other thread might be
+ * destructing a ThreadLocal and writing to the elements vector
+ * of this thread.
+ */
+ memcpy(reallocated, threadEntry->elements,
+ sizeof(ElementWrapper) * prevCapacity);
using std::swap;
- swap(ptr, threadEntry_.elements);
+ swap(reallocated, threadEntry->elements);
}
- threadEntry_.elementsCapacity = newSize;
+ threadEntry->elementsCapacity = newCapacity;
}
- free(ptr);
+ free(reallocated);
- if (prevSize == 0) {
+#ifdef FOLLY_TLD_USE_FOLLY_TLS
+ if (prevCapacity == 0) {
pthread_setspecific(meta.pthreadKey_, &meta);
}
+#endif
}
- static ElementWrapper& get(int id) {
- if (UNLIKELY(threadEntry_.elementsCapacity <= id)) {
+ static ElementWrapper& get(uint32_t id) {
+ ThreadEntry* threadEntry = getThreadEntry();
+ if (UNLIKELY(threadEntry->elementsCapacity <= id)) {
reserve(id);
+ assert(threadEntry->elementsCapacity > id);
}
- return threadEntry_.elements[id];
+ return threadEntry->elements[id];
}
};
-template <class Tag> __thread ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
+#ifdef FOLLY_TLD_USE_FOLLY_TLS
+template <class Tag>
+FOLLY_TLS ThreadEntry StaticMeta<Tag>::threadEntry_{nullptr, 0,
+ nullptr, nullptr};
+#endif
template <class Tag> StaticMeta<Tag>* StaticMeta<Tag>::inst_ = nullptr;
} // namespace threadlocal_detail
} // namespace folly
#endif /* FOLLY_DETAIL_THREADLOCALDETAIL_H_ */
-