folly/ThreadCachedInt.h

   1 /*
   2  * Copyright 2012 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * Higher performance (up to 10x) atomic increment using thread caching.
  19  *
  20  * @author Spencer Ahrens (sahrens)
  21  */
  22
  23 #ifndef FOLLY_THREADCACHEDINT_H
  24 #define FOLLY_THREADCACHEDINT_H
  25
  26 #include <atomic>
  27 #include "folly/Likely.h"
  28 #include "folly/ThreadLocal.h"
  29
  30 namespace folly {
  31
  32
  33 // Note that readFull requires holding a lock and iterating through all of the
  34 // thread local objects with the same Tag, so if you have a lot of
  35 // ThreadCachedInt's you should considering breaking up the Tag space even
  36 // further.
  37 template <class IntT, class Tag=IntT>
  38 class ThreadCachedInt : boost::noncopyable {
  39   struct IntCache;
  40
  41  public:
  42   explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
  43     : target_(initialVal), cacheSize_(cacheSize) {
  44   }
  45
  46   void increment(IntT inc) {
  47     auto cache = cache_.get();
  48     if (UNLIKELY(cache == NULL || cache->parent_ == NULL)) {
  49       cache = new IntCache(*this);
  50       cache_.reset(cache);
  51     }
  52     cache->increment(inc);
  53   }
  54
  55   // Quickly grabs the current value which may not include some cached
  56   // increments.
  57   IntT readFast() const {
  58     return target_.load(std::memory_order_relaxed);
  59   }
  60
  61   // Reads the current value plus all the cached increments.  Requires grabbing
  62   // a lock, so this is significantly slower than readFast().
  63   IntT readFull() const {
  64     IntT ret = readFast();
  65     for (const auto& cache : cache_.accessAllThreads()) {
  66       if (!cache.reset_.load(std::memory_order_acquire)) {
  67         ret += cache.val_.load(std::memory_order_relaxed);
  68       }
  69     }
  70     return ret;
  71   }
  72
  73   // Quickly reads and resets current value (doesn't reset cached increments).
  74   IntT readFastAndReset() {
  75     return target_.exchange(0, std::memory_order_release);
  76   }
  77
  78   // This function is designed for accumulating into another counter, where you
  79   // only want to count each increment once.  It can still get the count a
  80   // little off, however, but it should be much better than calling readFull()
  81   // and set(0) sequentially.
  82   IntT readFullAndReset() {
  83     IntT ret = readFastAndReset();
  84     for (auto& cache : cache_.accessAllThreads()) {
  85       if (!cache.reset_.load(std::memory_order_acquire)) {
  86         ret += cache.val_.load(std::memory_order_relaxed);
  87         cache.reset_.store(true, std::memory_order_release);
  88       }
  89     }
  90     return ret;
  91   }
  92
  93   void setCacheSize(uint32_t newSize) {
  94     cacheSize_.store(newSize, std::memory_order_release);
  95   }
  96
  97   uint32_t getCacheSize() const {
  98     return cacheSize_.load();
  99   }
 100
 101   ThreadCachedInt& operator+=(IntT inc) { increment(inc); return *this; }
 102   ThreadCachedInt& operator-=(IntT inc) { increment(-inc); return *this; }
 103   // pre-increment (we don't support post-increment)
 104   ThreadCachedInt& operator++() { increment(1); return *this; }
 105   ThreadCachedInt& operator--() { increment(-1); return *this; }
 106
 107   // Thread-safe set function.
 108   // This is a best effort implementation. In some edge cases, there could be
 109   // data loss (missing counts)
 110   void set(IntT newVal) {
 111     for (auto& cache : cache_.accessAllThreads()) {
 112       cache.reset_.store(true, std::memory_order_release);
 113     }
 114     target_.store(newVal, std::memory_order_release);
 115   }
 116
 117   // This is a little tricky - it's possible that our IntCaches are still alive
 118   // in another thread and will get destroyed after this destructor runs, so we
 119   // need to make sure we signal that this parent is dead.
 120   ~ThreadCachedInt() {
 121     for (auto& cache : cache_.accessAllThreads()) {
 122       cache.parent_ = NULL;
 123     }
 124   }
 125
 126  private:
 127   std::atomic<IntT> target_;
 128   std::atomic<uint32_t> cacheSize_;
 129   ThreadLocalPtr<IntCache,Tag> cache_; // Must be last for dtor ordering
 130
 131   // This should only ever be modified by one thread
 132   struct IntCache {
 133     ThreadCachedInt* parent_;
 134     mutable std::atomic<IntT> val_;
 135     mutable uint32_t numUpdates_;
 136     std::atomic<bool> reset_;
 137
 138     explicit IntCache(ThreadCachedInt& parent)
 139         : parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}
 140
 141     void increment(IntT inc) {
 142       if (LIKELY(!reset_.load(std::memory_order_acquire))) {
 143         // This thread is the only writer to val_, so it's fine do do
 144         // a relaxed load and do the addition non-atomically.
 145         val_.store(
 146           val_.load(std::memory_order_relaxed) + inc,
 147           std::memory_order_release
 148         );
 149       } else {
 150         val_.store(inc, std::memory_order_relaxed);
 151         reset_.store(false, std::memory_order_release);
 152       }
 153       ++numUpdates_;
 154       if (UNLIKELY(numUpdates_ >
 155                    parent_->cacheSize_.load(std::memory_order_acquire))) {
 156         flush();
 157       }
 158     }
 159
 160     void flush() const {
 161       parent_->target_.fetch_add(val_, std::memory_order_release);
 162       val_.store(0, std::memory_order_release);
 163       numUpdates_ = 0;
 164     }
 165
 166     ~IntCache() {
 167       if (parent_) {
 168         flush();
 169       }
 170     }
 171   };
 172 };
 173
 174 }
 175
 176 #endif