folly/ThreadLocal.h

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * Improved thread local storage for non-trivial types (similar speed as
  19  * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster
  20  * than boost::thread_specific_ptr).
  21  *
  22  * Also includes an accessor interface to walk all the thread local child
  23  * objects of a parent.  accessAllThreads() initializes an accessor which holds
  24  * a global lock *that blocks all creation and destruction of ThreadLocal
  25  * objects with the same Tag* and can be used as an iterable container.
  26  *
  27  * Intended use is for frequent write, infrequent read data access patterns such
  28  * as counters.
  29  *
  30  * There are two classes here - ThreadLocal and ThreadLocalPtr.  ThreadLocalPtr
  31  * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin
  32  * wrapper around ThreadLocalPtr that manages allocation automatically.
  33  *
  34  * @author Spencer Ahrens (sahrens)
  35  */
  36
  37 #pragma once
  38
  39 #include <folly/Likely.h>
  40 #include <folly/Portability.h>
  41 #include <folly/ScopeGuard.h>
  42 #include <boost/iterator/iterator_facade.hpp>
  43 #include <type_traits>
  44 #include <utility>
  45
  46 namespace folly {
  47 enum class TLPDestructionMode {
  48   THIS_THREAD,
  49   ALL_THREADS
  50 };
  51 }  // namespace
  52
  53 #include <folly/detail/ThreadLocalDetail.h>
  54
  55 namespace folly {
  56
  57 template<class T, class Tag> class ThreadLocalPtr;
  58
  59 template<class T, class Tag=void>
  60 class ThreadLocal {
  61  public:
  62   constexpr ThreadLocal() : constructor_([]() {
  63       return new T();
  64     }) {}
  65
  66   explicit ThreadLocal(std::function<T*()> constructor) :
  67       constructor_(constructor) {
  68   }
  69
  70   T* get() const {
  71     T* ptr = tlp_.get();
  72     if (LIKELY(ptr != nullptr)) {
  73       return ptr;
  74     }
  75
  76     // separated new item creation out to speed up the fast path.
  77     return makeTlp();
  78   }
  79
  80   T* operator->() const {
  81     return get();
  82   }
  83
  84   T& operator*() const {
  85     return *get();
  86   }
  87
  88   void reset(T* newPtr = nullptr) {
  89     tlp_.reset(newPtr);
  90   }
  91
  92   typedef typename ThreadLocalPtr<T,Tag>::Accessor Accessor;
  93   Accessor accessAllThreads() const {
  94     return tlp_.accessAllThreads();
  95   }
  96
  97   // movable
  98   ThreadLocal(ThreadLocal&&) = default;
  99   ThreadLocal& operator=(ThreadLocal&&) = default;
 100
 101  private:
 102   // non-copyable
 103   ThreadLocal(const ThreadLocal&) = delete;
 104   ThreadLocal& operator=(const ThreadLocal&) = delete;
 105
 106   T* makeTlp() const {
 107     auto ptr = constructor_();
 108     tlp_.reset(ptr);
 109     return ptr;
 110   }
 111
 112   mutable ThreadLocalPtr<T,Tag> tlp_;
 113   std::function<T*()> constructor_;
 114 };
 115
 116 /*
 117  * The idea here is that __thread is faster than pthread_getspecific, so we
 118  * keep a __thread array of pointers to objects (ThreadEntry::elements) where
 119  * each array has an index for each unique instance of the ThreadLocalPtr
 120  * object.  Each ThreadLocalPtr object has a unique id that is an index into
 121  * these arrays so we can fetch the correct object from thread local storage
 122  * very efficiently.
 123  *
 124  * In order to prevent unbounded growth of the id space and thus huge
 125  * ThreadEntry::elements, arrays, for example due to continuous creation and
 126  * destruction of ThreadLocalPtr objects, we keep a set of all active
 127  * instances.  When an instance is destroyed we remove it from the active
 128  * set and insert the id into freeIds_ for reuse.  These operations require a
 129  * global mutex, but only happen at construction and destruction time.
 130  *
 131  * We use a single global pthread_key_t per Tag to manage object destruction and
 132  * memory cleanup upon thread exit because there is a finite number of
 133  * pthread_key_t's available per machine.
 134  *
 135  * NOTE: Apple platforms don't support the same semantics for __thread that
 136  *       Linux does (and it's only supported at all on i386). For these, use
 137  *       pthread_setspecific()/pthread_getspecific() for the per-thread
 138  *       storage.  Windows (MSVC and GCC) does support the same semantics
 139  *       with __declspec(thread)
 140  */
 141
 142 template<class T, class Tag=void>
 143 class ThreadLocalPtr {
 144  private:
 145   typedef threadlocal_detail::StaticMeta<Tag> StaticMeta;
 146  public:
 147   constexpr ThreadLocalPtr() : id_() {}
 148
 149   ThreadLocalPtr(ThreadLocalPtr&& other) noexcept :
 150     id_(std::move(other.id_)) {
 151   }
 152
 153   ThreadLocalPtr& operator=(ThreadLocalPtr&& other) {
 154     assert(this != &other);
 155     destroy();
 156     id_ = std::move(other.id_);
 157     return *this;
 158   }
 159
 160   ~ThreadLocalPtr() {
 161     destroy();
 162   }
 163
 164   T* get() const {
 165     threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
 166     return static_cast<T*>(w.ptr);
 167   }
 168
 169   T* operator->() const {
 170     return get();
 171   }
 172
 173   T& operator*() const {
 174     return *get();
 175   }
 176
 177   T* release() {
 178     threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
 179
 180     return static_cast<T*>(w.release());
 181   }
 182
 183   void reset(T* newPtr = nullptr) {
 184     auto guard = makeGuard([&] { delete newPtr; });
 185     threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
 186
 187     w.dispose(TLPDestructionMode::THIS_THREAD);
 188     guard.dismiss();
 189     w.set(newPtr);
 190   }
 191
 192   explicit operator bool() const {
 193     return get() != nullptr;
 194   }
 195
 196   /**
 197    * reset() with a custom deleter:
 198    * deleter(T* ptr, TLPDestructionMode mode)
 199    * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus
 200    * deleting pointers for all threads), and THIS_THREAD if we're only deleting
 201    * the member for one thread (because of thread exit or reset()).
 202    * Invoking the deleter must not throw.
 203    */
 204   template <class Deleter>
 205   void reset(T* newPtr, const Deleter& deleter) {
 206     auto guard = makeGuard([&] {
 207       if (newPtr) {
 208         deleter(newPtr, TLPDestructionMode::THIS_THREAD);
 209       }
 210     });
 211     threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
 212     w.dispose(TLPDestructionMode::THIS_THREAD);
 213     guard.dismiss();
 214     w.set(newPtr, deleter);
 215   }
 216
 217   // Holds a global lock for iteration through all thread local child objects.
 218   // Can be used as an iterable container.
 219   // Use accessAllThreads() to obtain one.
 220   class Accessor {
 221     friend class ThreadLocalPtr<T,Tag>;
 222
 223     threadlocal_detail::StaticMetaBase& meta_;
 224     std::mutex* lock_;
 225     uint32_t id_;
 226
 227    public:
 228     class Iterator;
 229     friend class Iterator;
 230
 231     // The iterators obtained from Accessor are bidirectional iterators.
 232     class Iterator : public boost::iterator_facade<
 233           Iterator,                               // Derived
 234           T,                                      // value_type
 235           boost::bidirectional_traversal_tag> {   // traversal
 236       friend class Accessor;
 237       friend class boost::iterator_core_access;
 238       const Accessor* accessor_;
 239       threadlocal_detail::ThreadEntry* e_;
 240
 241       void increment() {
 242         e_ = e_->next;
 243         incrementToValid();
 244       }
 245
 246       void decrement() {
 247         e_ = e_->prev;
 248         decrementToValid();
 249       }
 250
 251       T& dereference() const {
 252         return *static_cast<T*>(e_->elements[accessor_->id_].ptr);
 253       }
 254
 255       bool equal(const Iterator& other) const {
 256         return (accessor_->id_ == other.accessor_->id_ &&
 257                 e_ == other.e_);
 258       }
 259
 260       explicit Iterator(const Accessor* accessor)
 261         : accessor_(accessor),
 262           e_(&accessor_->meta_.head_) {
 263       }
 264
 265       bool valid() const {
 266         return (e_->elements &&
 267                 accessor_->id_ < e_->elementsCapacity &&
 268                 e_->elements[accessor_->id_].ptr);
 269       }
 270
 271       void incrementToValid() {
 272         for (; e_ != &accessor_->meta_.head_ && !valid(); e_ = e_->next) { }
 273       }
 274
 275       void decrementToValid() {
 276         for (; e_ != &accessor_->meta_.head_ && !valid(); e_ = e_->prev) { }
 277       }
 278     };
 279
 280     ~Accessor() {
 281       release();
 282     }
 283
 284     Iterator begin() const {
 285       return ++Iterator(this);
 286     }
 287
 288     Iterator end() const {
 289       return Iterator(this);
 290     }
 291
 292     Accessor(const Accessor&) = delete;
 293     Accessor& operator=(const Accessor&) = delete;
 294
 295     Accessor(Accessor&& other) noexcept
 296       : meta_(other.meta_),
 297         lock_(other.lock_),
 298         id_(other.id_) {
 299       other.id_ = 0;
 300       other.lock_ = nullptr;
 301     }
 302
 303     Accessor& operator=(Accessor&& other) noexcept {
 304       // Each Tag has its own unique meta, and accessors with different Tags
 305       // have different types.  So either *this is empty, or this and other
 306       // have the same tag.  But if they have the same tag, they have the same
 307       // meta (and lock), so they'd both hold the lock at the same time,
 308       // which is impossible, which leaves only one possible scenario --
 309       // *this is empty.  Assert it.
 310       assert(&meta_ == &other.meta_);
 311       assert(lock_ == nullptr);
 312       using std::swap;
 313       swap(lock_, other.lock_);
 314       swap(id_, other.id_);
 315     }
 316
 317     Accessor()
 318       : meta_(threadlocal_detail::StaticMeta<Tag>::instance()),
 319         lock_(nullptr),
 320         id_(0) {
 321     }
 322
 323    private:
 324     explicit Accessor(uint32_t id)
 325       : meta_(threadlocal_detail::StaticMeta<Tag>::instance()),
 326         lock_(&meta_.lock_) {
 327       lock_->lock();
 328       id_ = id;
 329     }
 330
 331     void release() {
 332       if (lock_) {
 333         lock_->unlock();
 334         id_ = 0;
 335         lock_ = nullptr;
 336       }
 337     }
 338   };
 339
 340   // accessor allows a client to iterate through all thread local child
 341   // elements of this ThreadLocal instance.  Holds a global lock for each <Tag>
 342   Accessor accessAllThreads() const {
 343     static_assert(!std::is_same<Tag, void>::value,
 344                   "Must use a unique Tag to use the accessAllThreads feature");
 345     return Accessor(id_.getOrAllocate(StaticMeta::instance()));
 346   }
 347
 348  private:
 349   void destroy() {
 350     StaticMeta::instance().destroy(&id_);
 351   }
 352
 353   // non-copyable
 354   ThreadLocalPtr(const ThreadLocalPtr&) = delete;
 355   ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
 356
 357   mutable typename StaticMeta::EntryID id_;
 358 };
 359
 360 }  // namespace folly