abstract thread_local support
authorElizabeth Smith <elizabeths@fb.com>
Thu, 17 Apr 2014 14:49:10 +0000 (07:49 -0700)
committerSara Golemon <sgolemon@fb.com>
Fri, 18 Apr 2014 19:04:15 +0000 (12:04 -0700)
Summary:
change from using __thread to using FOLLY_THREAD_LOCAL macro, this will allow abstraction over gcc and msvc implementations of thread local (__thread and __declspec(thread)) which have the same semantices and will also allow drop in replacement of thread_local when compiler support for the feature is complete  This doesn't do anything about apple, however, which still has broken __thread support

This doesn't actually change any implementation for now, simply allows for correct compilation

Test Plan: fbmake runtests

Reviewed By: delong.j@fb.com

FB internal diff: D1278726

12 files changed:
folly/Portability.h
folly/ThreadLocal.h
folly/detail/CacheLocality.cpp
folly/detail/CacheLocality.h
folly/detail/MemoryIdler.cpp
folly/detail/ThreadLocalDetail.h
folly/experimental/exception_tracer/ExceptionTracerLib.cpp
folly/test/CacheLocalityTest.cpp
folly/test/DeterministicSchedule.cpp
folly/test/DeterministicSchedule.h
folly/test/MPMCQueueTest.cpp
folly/test/ThreadCachedIntTest.cpp

index 0a8989af3e4c3f002c4461b3ac20b27597a5f9fd..d3edde9e72e3c87a4978798940ba8a8fe3655878 100644 (file)
@@ -95,6 +95,18 @@ struct MaxAlign { char c; } __attribute__((aligned));
 # endif
 #endif
 
+/* Platform specific TLS support
+ * gcc implements __thread
+ * msvc implements __declspec(thread)
+ * the semantics are the same (but remember __thread is broken on apple)
+ */
+#if defined(_MSC_VER)
+# define FOLLY_TLS __declspec(thread)
+#elif defined(__GNUC__) || defined(__clang__)
+# define FOLLY_TLS __thread
+#else
+# error cannot define platform specific thread local storage
+#endif
 
 // Define to 1 if you have the `preadv' and `pwritev' functions, respectively
 #if !defined(FOLLY_HAVE_PREADV) && !defined(FOLLY_HAVE_PWRITEV)
index 4ecc757ad440dc983c6842a010e905395d31bdcd..bae58411713318a01a404206eb1478c40b550807 100644 (file)
@@ -128,7 +128,8 @@ class ThreadLocal {
  * NOTE: Apple platforms don't support the same semantics for __thread that
  *       Linux does (and it's only supported at all on i386). For these, use
  *       pthread_setspecific()/pthread_getspecific() for the per-thread
- *       storage.
+ *       storage.  Windows (MSVC and GCC) does support the same semantics
+ *       with __declspec(thread)
  */
 
 template<class T, class Tag=void>
index e3364dd57a54f962c29df3ecc3428f71f9e5ca01..a5b9393963bf6a91d5a04c1ed0d052ca3995cfd0 100644 (file)
@@ -230,7 +230,7 @@ template<>
 std::atomic<size_t> SequentialThreadId<std::atomic>::prevId(0);
 
 template<>
-__thread size_t SequentialThreadId<std::atomic>::currentId(0);
+FOLLY_TLS size_t SequentialThreadId<std::atomic>::currentId(0);
 
 /////////////// AccessSpreader
 
index 1a0a65f6e8247ed2a8c91204f6d65947beed3028..66a578897835ef228ad24770938c7ac6cc9c4ba6 100644 (file)
@@ -26,6 +26,7 @@
 #include <type_traits>
 #include <vector>
 #include "folly/Likely.h"
+#include "folly/Portability.h"
 
 namespace folly { namespace detail {
 
@@ -172,8 +173,7 @@ struct SequentialThreadId {
  private:
   static Atom<size_t> prevId;
 
-  // TODO: switch to thread_local
-  static __thread size_t currentId;
+  static FOLLY_TLS size_t currentId;
 };
 
 template <template<typename> class Atom, size_t kMaxCpus>
index 2d026e4806480c00f17462adb74a5c1f67ac39d3..979d9329e278f7d19998b2255fc82ae32842e951 100644 (file)
@@ -90,8 +90,8 @@ void MemoryIdler::flushLocalMallocCaches() {
 #ifdef __x86_64__
 
 static const size_t s_pageSize = sysconf(_SC_PAGESIZE);
-static __thread uintptr_t tls_stackLimit;
-static __thread size_t tls_stackSize;
+static FOLLY_TLS uintptr_t tls_stackLimit;
+static FOLLY_TLS size_t tls_stackSize;
 
 static void fetchStackLimits() {
   pthread_attr_t attr;
index 52e6212a47eb8e0cedc5cbed1532ec2fc161b342..423b0f2ddcf386c469de200ce05a448bde53f1fc 100644 (file)
@@ -169,7 +169,7 @@ struct StaticMeta {
   }
 
 #if !__APPLE__
-  static __thread ThreadEntry threadEntry_;
+  static FOLLY_TLS ThreadEntry threadEntry_;
 #endif
   static StaticMeta<Tag>* inst_;
 
@@ -412,7 +412,8 @@ struct StaticMeta {
 };
 
 #if !__APPLE__
-template <class Tag> __thread ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
+template <class Tag>
+FOLLY_TLS ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
 #endif
 template <class Tag> StaticMeta<Tag>* StaticMeta<Tag>::inst_ = nullptr;
 
index 8105a1563264fd15cf64bd68517ef70eae31d9de..e13774af7ea40f7e7e5bc6d322f454df7a041a8c 100644 (file)
@@ -42,9 +42,9 @@ using namespace folly::exception_tracer;
 
 namespace {
 
-__thread bool invalid;
-__thread StackTraceStack activeExceptions;
-__thread StackTraceStack caughtExceptions;
+FOLLY_TLS bool invalid;
+FOLLY_TLS StackTraceStack activeExceptions;
+FOLLY_TLS StackTraceStack caughtExceptions;
 pthread_once_t initialized = PTHREAD_ONCE_INIT;
 
 extern "C" {
index 3a739a001e7629d93f50aff1ac4f969ec4414d43..338c9c4751533e4c622655c7a65cb1a35c84f589 100644 (file)
@@ -327,7 +327,7 @@ TEST(SequentialThreadId, Simple) {
   EXPECT_EQ(cpu, again);
 }
 
-static __thread unsigned testingCpu = 0;
+static FOLLY_TLS unsigned testingCpu = 0;
 
 static int testingGetcpu(unsigned* cpu, unsigned* node, void* unused) {
   if (cpu != nullptr) {
index 3136cf3016698689acaaf3487a3dbc19801b4440..a19c977efc25126eafc51997f0f25cbc94e4f3d6 100644 (file)
@@ -25,8 +25,8 @@
 
 namespace folly { namespace test {
 
-__thread sem_t* DeterministicSchedule::tls_sem;
-__thread DeterministicSchedule* DeterministicSchedule::tls_sched;
+FOLLY_TLS sem_t* DeterministicSchedule::tls_sem;
+FOLLY_TLS DeterministicSchedule* DeterministicSchedule::tls_sched;
 
 // access is protected by futexLock
 static std::unordered_map<detail::Futex<DeterministicAtomic>*,
@@ -335,7 +335,8 @@ test::DeterministicAtomic<size_t>
     SequentialThreadId<test::DeterministicAtomic>::prevId(0);
 
 template<>
-__thread size_t SequentialThreadId<test::DeterministicAtomic>::currentId(0);
+FOLLY_TLS size_t
+    SequentialThreadId<test::DeterministicAtomic>::currentId(0);
 
 template<>
 const AccessSpreader<test::DeterministicAtomic>
index b73d7c5a02d9a05195cef9945b4be54cce42d00c..a344ac9ee65629c4cf831437143ae2d8bc867311 100644 (file)
@@ -129,8 +129,8 @@ class DeterministicSchedule : boost::noncopyable {
   static int getRandNumber(int n);
 
  private:
-  static __thread sem_t* tls_sem;
-  static __thread DeterministicSchedule* tls_sched;
+  static FOLLY_TLS sem_t* tls_sem;
+  static FOLLY_TLS DeterministicSchedule* tls_sched;
 
   std::function<int(int)> scheduler_;
   std::vector<sem_t*> sems_;
index 5bb2fa7b966ebf40baa02308640f0f92949fed9d..81dce2633d8d8915e2ae5170cabc4688dfff0cd8 100644 (file)
@@ -418,8 +418,8 @@ enum LifecycleEvent {
   MAX_LIFECYCLE_EVENT
 };
 
-static __thread int lc_counts[MAX_LIFECYCLE_EVENT];
-static __thread int lc_prev[MAX_LIFECYCLE_EVENT];
+static FOLLY_TLS int lc_counts[MAX_LIFECYCLE_EVENT];
+static FOLLY_TLS int lc_prev[MAX_LIFECYCLE_EVENT];
 
 static int lc_outstanding() {
   return lc_counts[DEFAULT_CONSTRUCTOR] + lc_counts[COPY_CONSTRUCTOR] +
index 15c0f96b8b8ec3652db43b527c03f89149d1d0c4..a84a82fd0052d8fc6f179267234f1fd38e04ad33 100644 (file)
@@ -152,8 +152,8 @@ ThreadLocal<int64_t> globalTL64Baseline;
 ThreadLocal<int32_t> globalTL32Baseline;
 std::atomic<int64_t> globalInt64Baseline(0);
 std::atomic<int32_t> globalInt32Baseline(0);
-__thread int64_t global__thread64;
-__thread int32_t global__thread32;
+FOLLY_TLS int64_t global__thread64;
+FOLLY_TLS int32_t global__thread32;
 
 // Alternate lock-free implementation.  Achieves about the same performance,
 // but uses about 20x more memory than ThreadCachedInt with 24 threads.