2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * N.B. You most likely do _not_ want to use PicoSpinLock or any other
19 * kind of spinlock. Consider MicroLock instead.
21 * In short, spinlocks in preemptive multi-tasking operating systems
22 * have serious problems and fast mutexes like std::mutex are almost
23 * certainly the better choice, because letting the OS scheduler put a
24 * thread to sleep is better for system responsiveness and throughput
25 * than wasting a timeslice repeatedly querying a lock held by a
26 * thread that's blocked, and you can't prevent userspace
29 * Spinlocks in an operating system kernel make much more sense than
30 * they do in userspace.
34 #define FOLLY_PICO_SPIN_LOCK_H_
37 * @author Keith Adams <kma@fb.com>
38 * @author Jordan DeLong <delong.j@fb.com>
46 #include <type_traits>
48 #include <glog/logging.h>
50 #include <folly/Portability.h>
51 #include <folly/detail/Sleeper.h>
53 #if !FOLLY_X64 && !FOLLY_AARCH64 && !FOLLY_PPC64
54 #error "PicoSpinLock.h is currently x64, aarch64 and ppc64 only."
60 * Spin lock on a single bit in an integral type. You can use this
61 * with 16, 32, or 64-bit integral types.
63 * This is useful if you want a small lock and already have an int
64 * with a bit in it that you aren't using. But note that it can't be
65 * as small as MicroSpinLock (1 byte), if you don't already have a
66 * convenient int with an unused bit lying around to put it on.
68 * To construct these, either use init() or zero initialize. We don't
69 * have a real constructor because we want this to be a POD type so we
70 * can put it into packed structs.
72 template <class IntType, int Bit = sizeof(IntType) * 8 - 1>
74 // Internally we deal with the unsigned version of the type.
75 typedef typename std::make_unsigned<IntType>::type UIntType;
77 static_assert(std::is_integral<IntType>::value,
78 "PicoSpinLock needs an integral type");
79 static_assert(sizeof(IntType) == 2 || sizeof(IntType) == 4 ||
81 "PicoSpinLock can't work on integers smaller than 2 bytes");
84 static const UIntType kLockBitMask_ = UIntType(1) << Bit;
85 mutable UIntType lock_;
88 * You must call this function before using this class, if you
89 * default constructed it. If you zero-initialized it you can
90 * assume the PicoSpinLock is in a valid unlocked state with
93 * (This doesn't use a constructor because we want to be a POD.)
95 void init(IntType initialValue = 0) {
96 CHECK(!(initialValue & kLockBitMask_));
97 reinterpret_cast<std::atomic<UIntType>*>(&lock_)->store(
98 UIntType(initialValue), std::memory_order_release);
102 * Returns the value of the integer we using for our lock, except
103 * with the bit we are using as a lock cleared, regardless of
104 * whether the lock is held.
106 * It is 'safe' to call this without holding the lock. (As in: you
107 * get the same guarantees for simultaneous accesses to an integer
108 * as you normally get.)
110 IntType getData() const {
111 auto res = reinterpret_cast<std::atomic<UIntType>*>(&lock_)->load(
112 std::memory_order_relaxed) &
118 * Set the value of the other bits in our integer.
120 * Don't use this when you aren't holding the lock, unless it can be
121 * guaranteed that no other threads may be trying to use this.
123 void setData(IntType w) {
124 CHECK(!(w & kLockBitMask_));
125 auto l = reinterpret_cast<std::atomic<UIntType>*>(&lock_);
127 (l->load(std::memory_order_relaxed) & kLockBitMask_) | w,
128 std::memory_order_relaxed);
132 * Try to get the lock without blocking: returns whether or not we
135 bool try_lock() const {
138 #if defined(FOLLY_SANITIZE_THREAD)
139 // TODO: Might be able to fully move to std::atomic when gcc emits lock btr:
140 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49244
142 !(reinterpret_cast<std::atomic<UIntType>*>(&lock_)->fetch_or(
143 kLockBitMask_, std::memory_order_acquire) &
146 switch (sizeof(IntType)) {
148 // There is no _interlockedbittestandset16 for some reason :(
149 ret = _InterlockedOr16(
150 (volatile short*)&lock_, (short)kLockBitMask_) & kLockBitMask_;
153 ret = _interlockedbittestandset((volatile long*)&lock_, Bit);
156 ret = _interlockedbittestandset64((volatile long long*)&lock_, Bit);
160 #define FB_DOBTS(size) \
161 asm volatile("lock; bts" #size " %1, (%2); setnc %0" \
167 switch (sizeof(IntType)) {
168 case 2: FB_DOBTS(w); break;
169 case 4: FB_DOBTS(l); break;
170 case 8: FB_DOBTS(q); break;
176 !(__atomic_fetch_or(&lock_, kLockBitMask_, __ATOMIC_SEQ_CST) &
179 #define FB_DOBTS(size) \
180 asm volatile("\teieio\n" \
181 "\tl" #size "arx 14,0,%[lockPtr]\n" \
183 "\tsldi 15,15,%[bit]\n" \
184 "\tand. 16,15,14\n" \
187 "\tst" #size "cx. 14,0,%[lockPtr]\n" \
189 "\tori %[output],%[output],1\n" \
192 : [output] "+r" (ret) \
193 : [lockPtr] "r"(&lock_), \
195 : "cr0", "memory", "r14", "r15", "r16")
197 switch (sizeof(IntType)) {
198 case 2: FB_DOBTS(h); break;
199 case 4: FB_DOBTS(w); break;
200 case 8: FB_DOBTS(d); break;
205 #error "x86 aarch64 ppc64 only"
212 * Block until we can acquire the lock. Uses Sleeper to wait.
215 detail::Sleeper sleeper;
216 while (!try_lock()) {
222 * Release the lock, without changing the value of the rest of the
225 void unlock() const {
227 switch (sizeof(IntType)) {
229 // There is no _interlockedbittestandreset16 for some reason :(
230 _InterlockedAnd16((volatile short*)&lock_, (short)~kLockBitMask_);
233 _interlockedbittestandreset((volatile long*)&lock_, Bit);
236 _interlockedbittestandreset64((volatile long long*)&lock_, Bit);
240 #define FB_DOBTR(size) \
241 asm volatile("lock; btr" #size " %0, (%1)" \
248 // Reads and writes can not be reordered wrt locked instructions,
249 // so we don't need a memory fence here.
250 switch (sizeof(IntType)) {
251 case 2: FB_DOBTR(w); break;
252 case 4: FB_DOBTR(l); break;
253 case 8: FB_DOBTR(q); break;
258 __atomic_fetch_and(&lock_, ~kLockBitMask_, __ATOMIC_SEQ_CST);
260 #define FB_DOBTR(size) \
261 asm volatile("\teieio\n" \
262 "0: l" #size "arx 14,0,%[lockPtr]\n" \
264 "\tsldi 15,15,%[bit]\n" \
266 "\tst" #size "cx. 14,0,%[lockPtr]\n" \
270 : [lockPtr] "r"(&lock_), \
272 : "cr0", "memory", "r14", "r15")
274 switch (sizeof(IntType)) {
275 case 2: FB_DOBTR(h); break;
276 case 4: FB_DOBTR(w); break;
277 case 8: FB_DOBTR(d); break;
282 # error "x64 aarch64 ppc64 only"