abstract thread_local support

author Elizabeth Smith <elizabeths@fb.com>

Thu, 17 Apr 2014 14:49:10 +0000 (07:49 -0700)

committer Sara Golemon <sgolemon@fb.com>

Fri, 18 Apr 2014 19:04:15 +0000 (12:04 -0700)
author Elizabeth Smith <elizabeths@fb.com>
Thu, 17 Apr 2014 14:49:10 +0000 (07:49 -0700)
committer Sara Golemon <sgolemon@fb.com>
Fri, 18 Apr 2014 19:04:15 +0000 (12:04 -0700)
diff --git a/folly/Portability.h b/folly/Portability.h

index 0a8989af3e4c3f002c4461b3ac20b27597a5f9fd..d3edde9e72e3c87a4978798940ba8a8fe3655878 100644 (file)
--- a/folly/Portability.h
+++ b/folly/Portability.h
@@ -95,6 +95,18 @@ struct MaxAlign { char c; } __attribute__((aligned));
  # endif
  #endif
  
+/* Platform specific TLS support
+ * gcc implements __thread
+ * msvc implements __declspec(thread)
+ * the semantics are the same (but remember __thread is broken on apple)
+ */
+#if defined(_MSC_VER)
+# define FOLLY_TLS __declspec(thread)
+#elif defined(__GNUC__) || defined(__clang__)
+# define FOLLY_TLS __thread
+#else
+# error cannot define platform specific thread local storage
+#endif
  
  // Define to 1 if you have the `preadv' and `pwritev' functions, respectively
  #if !defined(FOLLY_HAVE_PREADV) && !defined(FOLLY_HAVE_PWRITEV)
diff --git a/folly/ThreadLocal.h b/folly/ThreadLocal.h

index 4ecc757ad440dc983c6842a010e905395d31bdcd..bae58411713318a01a404206eb1478c40b550807 100644 (file)
--- a/folly/ThreadLocal.h
+++ b/folly/ThreadLocal.h
@@ -128,7 +128,8 @@ class ThreadLocal {
   * NOTE: Apple platforms don't support the same semantics for __thread that
   *       Linux does (and it's only supported at all on i386). For these, use
   *       pthread_setspecific()/pthread_getspecific() for the per-thread
- *       storage.
+ *       storage.  Windows (MSVC and GCC) does support the same semantics
+ *       with __declspec(thread)
   */
  
  template<class T, class Tag=void>
diff --git a/folly/detail/CacheLocality.cpp b/folly/detail/CacheLocality.cpp

index e3364dd57a54f962c29df3ecc3428f71f9e5ca01..a5b9393963bf6a91d5a04c1ed0d052ca3995cfd0 100644 (file)
--- a/folly/detail/CacheLocality.cpp
+++ b/folly/detail/CacheLocality.cpp
@@ -230,7 +230,7 @@ template<>
  std::atomic<size_t> SequentialThreadId<std::atomic>::prevId(0);
  
  template<>
-__thread size_t SequentialThreadId<std::atomic>::currentId(0);
+FOLLY_TLS size_t SequentialThreadId<std::atomic>::currentId(0);
  
  /////////////// AccessSpreader
  
diff --git a/folly/detail/CacheLocality.h b/folly/detail/CacheLocality.h

index 1a0a65f6e8247ed2a8c91204f6d65947beed3028..66a578897835ef228ad24770938c7ac6cc9c4ba6 100644 (file)
--- a/folly/detail/CacheLocality.h
+++ b/folly/detail/CacheLocality.h
@@ -26,6 +26,7 @@
  #include <type_traits>
  #include <vector>
  #include "folly/Likely.h"
+#include "folly/Portability.h"
  
  namespace folly { namespace detail {
  
@@ -172,8 +173,7 @@ struct SequentialThreadId {
   private:
    static Atom<size_t> prevId;
  
-  // TODO: switch to thread_local
-  static __thread size_t currentId;
+  static FOLLY_TLS size_t currentId;
  };
  
  template <template<typename> class Atom, size_t kMaxCpus>
diff --git a/folly/detail/MemoryIdler.cpp b/folly/detail/MemoryIdler.cpp

index 2d026e4806480c00f17462adb74a5c1f67ac39d3..979d9329e278f7d19998b2255fc82ae32842e951 100644 (file)
--- a/folly/detail/MemoryIdler.cpp
+++ b/folly/detail/MemoryIdler.cpp
@@ -90,8 +90,8 @@ void MemoryIdler::flushLocalMallocCaches() {
  #ifdef __x86_64__
  
  static const size_t s_pageSize = sysconf(_SC_PAGESIZE);
-static __thread uintptr_t tls_stackLimit;
-static __thread size_t tls_stackSize;
+static FOLLY_TLS uintptr_t tls_stackLimit;
+static FOLLY_TLS size_t tls_stackSize;
  
  static void fetchStackLimits() {
    pthread_attr_t attr;
diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h

index 52e6212a47eb8e0cedc5cbed1532ec2fc161b342..423b0f2ddcf386c469de200ce05a448bde53f1fc 100644 (file)
--- a/folly/detail/ThreadLocalDetail.h
+++ b/folly/detail/ThreadLocalDetail.h
@@ -169,7 +169,7 @@ struct StaticMeta {
    }
  
  #if !__APPLE__
-  static __thread ThreadEntry threadEntry_;
+  static FOLLY_TLS ThreadEntry threadEntry_;
  #endif
    static StaticMeta<Tag>* inst_;
  
@@ -412,7 +412,8 @@ struct StaticMeta {
  };
  
  #if !__APPLE__
-template <class Tag> __thread ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
+template <class Tag>
+FOLLY_TLS ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
  #endif
  template <class Tag> StaticMeta<Tag>* StaticMeta<Tag>::inst_ = nullptr;
  
diff --git a/folly/experimental/exception_tracer/ExceptionTracerLib.cpp b/folly/experimental/exception_tracer/ExceptionTracerLib.cpp

index 8105a1563264fd15cf64bd68517ef70eae31d9de..e13774af7ea40f7e7e5bc6d322f454df7a041a8c 100644 (file)
--- a/folly/experimental/exception_tracer/ExceptionTracerLib.cpp
+++ b/folly/experimental/exception_tracer/ExceptionTracerLib.cpp
@@ -42,9 +42,9 @@ using namespace folly::exception_tracer;
  
  namespace {
  
-__thread bool invalid;
-__thread StackTraceStack activeExceptions;
-__thread StackTraceStack caughtExceptions;
+FOLLY_TLS bool invalid;
+FOLLY_TLS StackTraceStack activeExceptions;
+FOLLY_TLS StackTraceStack caughtExceptions;
  pthread_once_t initialized = PTHREAD_ONCE_INIT;
  
  extern "C" {
diff --git a/folly/test/CacheLocalityTest.cpp b/folly/test/CacheLocalityTest.cpp

index 3a739a001e7629d93f50aff1ac4f969ec4414d43..338c9c4751533e4c622655c7a65cb1a35c84f589 100644 (file)
--- a/folly/test/CacheLocalityTest.cpp
+++ b/folly/test/CacheLocalityTest.cpp
@@ -327,7 +327,7 @@ TEST(SequentialThreadId, Simple) {
    EXPECT_EQ(cpu, again);
  }
  
-static __thread unsigned testingCpu = 0;
+static FOLLY_TLS unsigned testingCpu = 0;
  
  static int testingGetcpu(unsigned* cpu, unsigned* node, void* unused) {
    if (cpu != nullptr) {
diff --git a/folly/test/DeterministicSchedule.cpp b/folly/test/DeterministicSchedule.cpp

index 3136cf3016698689acaaf3487a3dbc19801b4440..a19c977efc25126eafc51997f0f25cbc94e4f3d6 100644 (file)
--- a/folly/test/DeterministicSchedule.cpp
+++ b/folly/test/DeterministicSchedule.cpp
@@ -25,8 +25,8 @@
  
  namespace folly { namespace test {
  
-__thread sem_t* DeterministicSchedule::tls_sem;
-__thread DeterministicSchedule* DeterministicSchedule::tls_sched;
+FOLLY_TLS sem_t* DeterministicSchedule::tls_sem;
+FOLLY_TLS DeterministicSchedule* DeterministicSchedule::tls_sched;
  
  // access is protected by futexLock
  static std::unordered_map<detail::Futex<DeterministicAtomic>*,
@@ -335,7 +335,8 @@ test::DeterministicAtomic<size_t>
      SequentialThreadId<test::DeterministicAtomic>::prevId(0);
  
  template<>
-__thread size_t SequentialThreadId<test::DeterministicAtomic>::currentId(0);
+FOLLY_TLS size_t
+    SequentialThreadId<test::DeterministicAtomic>::currentId(0);
  
  template<>
  const AccessSpreader<test::DeterministicAtomic>
diff --git a/folly/test/DeterministicSchedule.h b/folly/test/DeterministicSchedule.h

index b73d7c5a02d9a05195cef9945b4be54cce42d00c..a344ac9ee65629c4cf831437143ae2d8bc867311 100644 (file)
--- a/folly/test/DeterministicSchedule.h
+++ b/folly/test/DeterministicSchedule.h
@@ -129,8 +129,8 @@ class DeterministicSchedule : boost::noncopyable {
    static int getRandNumber(int n);
  
   private:
-  static __thread sem_t* tls_sem;
-  static __thread DeterministicSchedule* tls_sched;
+  static FOLLY_TLS sem_t* tls_sem;
+  static FOLLY_TLS DeterministicSchedule* tls_sched;
  
    std::function<int(int)> scheduler_;
    std::vector<sem_t*> sems_;
diff --git a/folly/test/MPMCQueueTest.cpp b/folly/test/MPMCQueueTest.cpp

index 5bb2fa7b966ebf40baa02308640f0f92949fed9d..81dce2633d8d8915e2ae5170cabc4688dfff0cd8 100644 (file)
--- a/folly/test/MPMCQueueTest.cpp
+++ b/folly/test/MPMCQueueTest.cpp
@@ -418,8 +418,8 @@ enum LifecycleEvent {
    MAX_LIFECYCLE_EVENT
  };
  
-static __thread int lc_counts[MAX_LIFECYCLE_EVENT];
-static __thread int lc_prev[MAX_LIFECYCLE_EVENT];
+static FOLLY_TLS int lc_counts[MAX_LIFECYCLE_EVENT];
+static FOLLY_TLS int lc_prev[MAX_LIFECYCLE_EVENT];
  
  static int lc_outstanding() {
    return lc_counts[DEFAULT_CONSTRUCTOR] + lc_counts[COPY_CONSTRUCTOR] +
diff --git a/folly/test/ThreadCachedIntTest.cpp b/folly/test/ThreadCachedIntTest.cpp

index 15c0f96b8b8ec3652db43b527c03f89149d1d0c4..a84a82fd0052d8fc6f179267234f1fd38e04ad33 100644 (file)
--- a/folly/test/ThreadCachedIntTest.cpp
+++ b/folly/test/ThreadCachedIntTest.cpp
@@ -152,8 +152,8 @@ ThreadLocal<int64_t> globalTL64Baseline;
  ThreadLocal<int32_t> globalTL32Baseline;
  std::atomic<int64_t> globalInt64Baseline(0);
  std::atomic<int32_t> globalInt32Baseline(0);
-__thread int64_t global__thread64;
-__thread int32_t global__thread32;
+FOLLY_TLS int64_t global__thread64;
+FOLLY_TLS int32_t global__thread32;
  
  // Alternate lock-free implementation.  Achieves about the same performance,
  // but uses about 20x more memory than ThreadCachedInt with 24 threads.
author	Elizabeth Smith <elizabeths@fb.com>
	Thu, 17 Apr 2014 14:49:10 +0000 (07:49 -0700)
committer	Sara Golemon <sgolemon@fb.com>
	Fri, 18 Apr 2014 19:04:15 +0000 (12:04 -0700)
folly/Portability.h		patch \| blob \| history
folly/ThreadLocal.h		patch \| blob \| history
folly/detail/CacheLocality.cpp		patch \| blob \| history
folly/detail/CacheLocality.h		patch \| blob \| history
folly/detail/MemoryIdler.cpp		patch \| blob \| history
folly/detail/ThreadLocalDetail.h		patch \| blob \| history
folly/experimental/exception_tracer/ExceptionTracerLib.cpp		patch \| blob \| history
folly/test/CacheLocalityTest.cpp		patch \| blob \| history
folly/test/DeterministicSchedule.cpp		patch \| blob \| history
folly/test/DeterministicSchedule.h		patch \| blob \| history
folly/test/MPMCQueueTest.cpp		patch \| blob \| history
folly/test/ThreadCachedIntTest.cpp		patch \| blob \| history