2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Higher performance (up to 10x) atomic increment using thread caching.
20 * @author Spencer Ahrens (sahrens)
23 #ifndef FOLLY_THREADCACHEDINT_H
24 #define FOLLY_THREADCACHEDINT_H
28 #include <boost/noncopyable.hpp>
30 #include "folly/Likely.h"
31 #include "folly/ThreadLocal.h"
36 // Note that readFull requires holding a lock and iterating through all of the
37 // thread local objects with the same Tag, so if you have a lot of
38 // ThreadCachedInt's you should considering breaking up the Tag space even
40 template <class IntT, class Tag=IntT>
41 class ThreadCachedInt : boost::noncopyable {
45 explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
46 : target_(initialVal), cacheSize_(cacheSize) {
49 void increment(IntT inc) {
50 auto cache = cache_.get();
51 if (UNLIKELY(cache == nullptr || cache->parent_ == nullptr)) {
52 cache = new IntCache(*this);
55 cache->increment(inc);
58 // Quickly grabs the current value which may not include some cached
60 IntT readFast() const {
61 return target_.load(std::memory_order_relaxed);
64 // Reads the current value plus all the cached increments. Requires grabbing
65 // a lock, so this is significantly slower than readFast().
66 IntT readFull() const {
67 IntT ret = readFast();
68 for (const auto& cache : cache_.accessAllThreads()) {
69 if (!cache.reset_.load(std::memory_order_acquire)) {
70 ret += cache.val_.load(std::memory_order_relaxed);
76 // Quickly reads and resets current value (doesn't reset cached increments).
77 IntT readFastAndReset() {
78 return target_.exchange(0, std::memory_order_release);
81 // This function is designed for accumulating into another counter, where you
82 // only want to count each increment once. It can still get the count a
83 // little off, however, but it should be much better than calling readFull()
84 // and set(0) sequentially.
85 IntT readFullAndReset() {
86 IntT ret = readFastAndReset();
87 for (auto& cache : cache_.accessAllThreads()) {
88 if (!cache.reset_.load(std::memory_order_acquire)) {
89 ret += cache.val_.load(std::memory_order_relaxed);
90 cache.reset_.store(true, std::memory_order_release);
96 void setCacheSize(uint32_t newSize) {
97 cacheSize_.store(newSize, std::memory_order_release);
100 uint32_t getCacheSize() const {
101 return cacheSize_.load();
104 ThreadCachedInt& operator+=(IntT inc) { increment(inc); return *this; }
105 ThreadCachedInt& operator-=(IntT inc) { increment(-inc); return *this; }
106 // pre-increment (we don't support post-increment)
107 ThreadCachedInt& operator++() { increment(1); return *this; }
108 ThreadCachedInt& operator--() { increment(-1); return *this; }
110 // Thread-safe set function.
111 // This is a best effort implementation. In some edge cases, there could be
112 // data loss (missing counts)
113 void set(IntT newVal) {
114 for (auto& cache : cache_.accessAllThreads()) {
115 cache.reset_.store(true, std::memory_order_release);
117 target_.store(newVal, std::memory_order_release);
120 // This is a little tricky - it's possible that our IntCaches are still alive
121 // in another thread and will get destroyed after this destructor runs, so we
122 // need to make sure we signal that this parent is dead.
124 for (auto& cache : cache_.accessAllThreads()) {
125 cache.parent_ = nullptr;
130 std::atomic<IntT> target_;
131 std::atomic<uint32_t> cacheSize_;
132 ThreadLocalPtr<IntCache,Tag> cache_; // Must be last for dtor ordering
134 // This should only ever be modified by one thread
136 ThreadCachedInt* parent_;
137 mutable std::atomic<IntT> val_;
138 mutable uint32_t numUpdates_;
139 std::atomic<bool> reset_;
141 explicit IntCache(ThreadCachedInt& parent)
142 : parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}
144 void increment(IntT inc) {
145 if (LIKELY(!reset_.load(std::memory_order_acquire))) {
146 // This thread is the only writer to val_, so it's fine do do
147 // a relaxed load and do the addition non-atomically.
149 val_.load(std::memory_order_relaxed) + inc,
150 std::memory_order_release
153 val_.store(inc, std::memory_order_relaxed);
154 reset_.store(false, std::memory_order_release);
157 if (UNLIKELY(numUpdates_ >
158 parent_->cacheSize_.load(std::memory_order_acquire))) {
164 parent_->target_.fetch_add(val_, std::memory_order_release);
165 val_.store(0, std::memory_order_release);