2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/detail/Futex.h>
20 #include <condition_variable>
22 #include <boost/intrusive/list.hpp>
23 #include <folly/Hash.h>
24 #include <folly/ScopeGuard.h>
28 # include <linux/futex.h>
29 # include <sys/syscall.h>
32 using namespace std::chrono;
34 namespace folly { namespace detail {
38 ////////////////////////////////////////////////////
39 // native implementation using the futex() syscall
43 int nativeFutexWake(void* addr, int count, uint32_t wakeMask) {
44 int rv = syscall(SYS_futex,
46 FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, /* op */
48 nullptr, /* timeout */
57 template <class Clock>
59 timeSpecFromTimePoint(time_point<Clock> absTime)
61 auto duration = absTime.time_since_epoch();
62 auto secs = duration_cast<seconds>(duration);
63 auto nanos = duration_cast<nanoseconds>(duration - secs);
64 struct timespec result = { secs.count(), nanos.count() };
68 FutexResult nativeFutexWaitImpl(void* addr,
70 time_point<system_clock>* absSystemTime,
71 time_point<steady_clock>* absSteadyTime,
73 assert(absSystemTime == nullptr || absSteadyTime == nullptr);
75 int op = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG;
77 struct timespec* timeout = nullptr;
79 if (absSystemTime != nullptr) {
80 op |= FUTEX_CLOCK_REALTIME;
81 ts = timeSpecFromTimePoint(*absSystemTime);
83 } else if (absSteadyTime != nullptr) {
84 ts = timeSpecFromTimePoint(*absSteadyTime);
88 // Unlike FUTEX_WAIT, FUTEX_WAIT_BITSET requires an absolute timeout
89 // value - http://locklessinc.com/articles/futex_cheat_sheet/
90 int rv = syscall(SYS_futex,
94 timeout, /* timeout */
99 return FutexResult::AWOKEN;
103 assert(timeout != nullptr);
104 return FutexResult::TIMEDOUT;
106 return FutexResult::INTERRUPTED;
108 return FutexResult::VALUE_CHANGED;
111 // EACCESS, EFAULT, or EINVAL. All of these mean *addr point to
112 // invalid memory (or I misunderstand the API). We can either
113 // crash, or return a value that lets the process continue for
114 // a bit. We choose the latter. VALUE_CHANGED probably turns the
115 // caller into a spin lock.
116 return FutexResult::VALUE_CHANGED;
123 ///////////////////////////////////////////////////////
124 // compatibility implementation using standard C++ API
126 // Our emulated futex uses 4096 lists of wait nodes. There are two levels
127 // of locking: a per-list mutex that controls access to the list and a
128 // per-node mutex, condvar, and bool that are used for the actual wakeups.
129 // The per-node mutex allows us to do precise wakeups without thundering
132 struct EmulatedFutexWaitNode : public boost::intrusive::list_base_hook<> {
134 const uint32_t waitMask_;
136 // tricky: hold both bucket and node mutex to write, either to read
139 std::condition_variable cond_;
141 EmulatedFutexWaitNode(void* addr, uint32_t waitMask)
143 , waitMask_(waitMask)
149 struct EmulatedFutexBucket {
151 boost::intrusive::list<EmulatedFutexWaitNode> waiters_;
153 static const size_t kNumBuckets = 4096;
154 static EmulatedFutexBucket* gBuckets;
155 static std::once_flag gBucketInit;
157 static EmulatedFutexBucket& bucketFor(void* addr) {
158 std::call_once(gBucketInit, [](){
159 gBuckets = new EmulatedFutexBucket[kNumBuckets];
161 uint64_t mixedBits = folly::hash::twang_mix64(
162 reinterpret_cast<uintptr_t>(addr));
163 return gBuckets[mixedBits % kNumBuckets];
167 EmulatedFutexBucket* EmulatedFutexBucket::gBuckets;
168 std::once_flag EmulatedFutexBucket::gBucketInit;
170 int emulatedFutexWake(void* addr, int count, uint32_t waitMask) {
171 auto& bucket = EmulatedFutexBucket::bucketFor(addr);
172 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
175 for (auto iter = bucket.waiters_.begin();
176 numAwoken < count && iter != bucket.waiters_.end(); ) {
178 auto& node = *iter++;
179 if (node.addr_ == addr && (node.waitMask_ & waitMask) != 0) {
182 // we unlink, but waiter destroys the node
183 bucket.waiters_.erase(current);
185 std::unique_lock<std::mutex> nodeLock(node.mutex_);
186 node.signaled_ = true;
187 node.cond_.notify_one();
193 FutexResult emulatedFutexWaitImpl(
196 time_point<system_clock>* absSystemTime,
197 time_point<steady_clock>* absSteadyTime,
199 auto& bucket = EmulatedFutexBucket::bucketFor(addr);
200 EmulatedFutexWaitNode node(addr, waitMask);
203 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
206 memcpy(&actual, addr, sizeof(uint32_t));
207 if (actual != expected) {
208 return FutexResult::VALUE_CHANGED;
211 bucket.waiters_.push_back(node);
212 } // bucketLock scope
214 std::cv_status status = std::cv_status::no_timeout;
216 std::unique_lock<std::mutex> nodeLock(node.mutex_);
217 while (!node.signaled_ && status != std::cv_status::timeout) {
218 if (absSystemTime != nullptr) {
219 status = node.cond_.wait_until(nodeLock, *absSystemTime);
220 } else if (absSteadyTime != nullptr) {
221 status = node.cond_.wait_until(nodeLock, *absSteadyTime);
223 node.cond_.wait(nodeLock);
228 if (status == std::cv_status::timeout) {
229 // it's not really a timeout until we unlink the unsignaled node
230 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
231 if (!node.signaled_) {
232 bucket.waiters_.erase(bucket.waiters_.iterator_to(node));
233 return FutexResult::TIMEDOUT;
236 return FutexResult::AWOKEN;
242 /////////////////////////////////
243 // Futex<> specializations
247 Futex<std::atomic>::futexWake(int count, uint32_t wakeMask) {
249 return nativeFutexWake(this, count, wakeMask);
251 return emulatedFutexWake(this, count, wakeMask);
257 Futex<EmulatedFutexAtomic>::futexWake(int count, uint32_t wakeMask) {
258 return emulatedFutexWake(this, count, wakeMask);
263 Futex<std::atomic>::futexWaitImpl(uint32_t expected,
264 time_point<system_clock>* absSystemTime,
265 time_point<steady_clock>* absSteadyTime,
268 return nativeFutexWaitImpl(
269 this, expected, absSystemTime, absSteadyTime, waitMask);
271 return emulatedFutexWaitImpl(
272 this, expected, absSystemTime, absSteadyTime, waitMask);
278 Futex<EmulatedFutexAtomic>::futexWaitImpl(
280 time_point<system_clock>* absSystemTime,
281 time_point<steady_clock>* absSteadyTime,
283 return emulatedFutexWaitImpl(
284 this, expected, absSystemTime, absSteadyTime, waitMask);
287 }} // namespace folly::detail