folly/SmallLocks.h

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef FOLLY_SMALLLOCKS_H_
  18 #define FOLLY_SMALLLOCKS_H_
  19
  20 /*
  21  * This header defines a few very small mutex types.  These are useful
  22  * in highly memory-constrained environments where contention is
  23  * unlikely.
  24  *
  25  * Note: these locks are for use when you aren't likely to contend on
  26  * the critical section, or when the critical section is incredibly
  27  * small.  Given that, both of the locks defined in this header are
  28  * inherently unfair: that is, the longer a thread is waiting, the
  29  * longer it waits between attempts to acquire, so newer waiters are
  30  * more likely to get the mutex.  For the intended use-case this is
  31  * fine.
  32  *
  33  * @author Keith Adams <kma@fb.com>
  34  * @author Jordan DeLong <delong.j@fb.com>
  35  */
  36
  37 #include <array>
  38 #include <cinttypes>
  39 #include <type_traits>
  40 #include <ctime>
  41 #include <boost/noncopyable.hpp>
  42 #include <cstdlib>
  43 #include <pthread.h>
  44 #include <mutex>
  45 #include <atomic>
  46
  47 #include <glog/logging.h>
  48 #include <folly/Portability.h>
  49
  50 #if !FOLLY_X64 && !FOLLY_A64
  51 # error "SmallLocks.h is currently x64 and aarch64 only."
  52 #endif
  53
  54 namespace folly {
  55
  56 //////////////////////////////////////////////////////////////////////
  57
  58 namespace detail {
  59
  60   /*
  61    * A helper object for the contended case. Starts off with eager
  62    * spinning, and falls back to sleeping for small quantums.
  63    */
  64   class Sleeper {
  65     static const uint32_t kMaxActiveSpin = 4000;
  66
  67     uint32_t spinCount;
  68
  69   public:
  70     Sleeper() : spinCount(0) {}
  71
  72     void wait() {
  73       if (spinCount < kMaxActiveSpin) {
  74         ++spinCount;
  75         asm_volatile_pause();
  76       } else {
  77         /*
  78          * Always sleep 0.5ms, assuming this will make the kernel put
  79          * us down for whatever its minimum timer resolution is (in
  80          * linux this varies by kernel version from 1ms to 10ms).
  81          */
  82         struct timespec ts = { 0, 500000 };
  83         nanosleep(&ts, nullptr);
  84       }
  85     }
  86   };
  87
  88 }
  89
  90 //////////////////////////////////////////////////////////////////////
  91
  92 /*
  93  * A really, *really* small spinlock for fine-grained locking of lots
  94  * of teeny-tiny data.
  95  *
  96  * Zero initializing these is guaranteed to be as good as calling
  97  * init(), since the free state is guaranteed to be all-bits zero.
  98  *
  99  * This class should be kept a POD, so we can used it in other packed
 100  * structs (gcc does not allow __attribute__((__packed__)) on structs that
 101  * contain non-POD data).  This means avoid adding a constructor, or
 102  * making some members private, etc.
 103  */
 104 struct MicroSpinLock {
 105   enum { FREE = 0, LOCKED = 1 };
 106   // lock_ can't be std::atomic<> to preserve POD-ness.
 107   uint8_t lock_;
 108
 109   // Initialize this MSL.  It is unnecessary to call this if you
 110   // zero-initialize the MicroSpinLock.
 111   void init() {
 112     payload()->store(FREE);
 113   }
 114
 115   bool try_lock() {
 116     return cas(FREE, LOCKED);
 117   }
 118
 119   void lock() {
 120     detail::Sleeper sleeper;
 121     do {
 122       while (payload()->load() != FREE) {
 123         sleeper.wait();
 124       }
 125     } while (!try_lock());
 126     DCHECK(payload()->load() == LOCKED);
 127   }
 128
 129   void unlock() {
 130     CHECK(payload()->load() == LOCKED);
 131     payload()->store(FREE, std::memory_order_release);
 132   }
 133
 134  private:
 135   std::atomic<uint8_t>* payload() {
 136     return reinterpret_cast<std::atomic<uint8_t>*>(&this->lock_);
 137   }
 138
 139   bool cas(uint8_t compare, uint8_t newVal) {
 140     return std::atomic_compare_exchange_strong_explicit(payload(), &compare, newVal,
 141                                                         std::memory_order_acquire,
 142                                                         std::memory_order_relaxed);
 143   }
 144 };
 145
 146 //////////////////////////////////////////////////////////////////////
 147
 148 /*
 149  * Spin lock on a single bit in an integral type.  You can use this
 150  * with 16, 32, or 64-bit integral types.
 151  *
 152  * This is useful if you want a small lock and already have an int
 153  * with a bit in it that you aren't using.  But note that it can't be
 154  * as small as MicroSpinLock (1 byte), if you don't already have a
 155  * convenient int with an unused bit lying around to put it on.
 156  *
 157  * To construct these, either use init() or zero initialize.  We don't
 158  * have a real constructor because we want this to be a POD type so we
 159  * can put it into packed structs.
 160  */
 161 template<class IntType, int Bit = sizeof(IntType) * 8 - 1>
 162 struct PicoSpinLock {
 163   // Internally we deal with the unsigned version of the type.
 164   typedef typename std::make_unsigned<IntType>::type UIntType;
 165
 166   static_assert(std::is_integral<IntType>::value,
 167                 "PicoSpinLock needs an integral type");
 168   static_assert(sizeof(IntType) == 2 || sizeof(IntType) == 4 ||
 169                   sizeof(IntType) == 8,
 170                 "PicoSpinLock can't work on integers smaller than 2 bytes");
 171
 172  public:
 173   static const UIntType kLockBitMask_ = UIntType(1) << Bit;
 174   UIntType lock_;
 175
 176   /*
 177    * You must call this function before using this class, if you
 178    * default constructed it.  If you zero-initialized it you can
 179    * assume the PicoSpinLock is in a valid unlocked state with
 180    * getData() == 0.
 181    *
 182    * (This doesn't use a constructor because we want to be a POD.)
 183    */
 184   void init(IntType initialValue = 0) {
 185     CHECK(!(initialValue & kLockBitMask_));
 186     lock_ = initialValue;
 187   }
 188
 189   /*
 190    * Returns the value of the integer we using for our lock, except
 191    * with the bit we are using as a lock cleared, regardless of
 192    * whether the lock is held.
 193    *
 194    * It is 'safe' to call this without holding the lock.  (As in: you
 195    * get the same guarantees for simultaneous accesses to an integer
 196    * as you normally get.)
 197    */
 198   IntType getData() const {
 199     return static_cast<IntType>(lock_ & ~kLockBitMask_);
 200   }
 201
 202   /*
 203    * Set the value of the other bits in our integer.
 204    *
 205    * Don't use this when you aren't holding the lock, unless it can be
 206    * guaranteed that no other threads may be trying to use this.
 207    */
 208   void setData(IntType w) {
 209     CHECK(!(w & kLockBitMask_));
 210     lock_ = (lock_ & kLockBitMask_) | w;
 211   }
 212
 213   /*
 214    * Try to get the lock without blocking: returns whether or not we
 215    * got it.
 216    */
 217   bool try_lock() const {
 218     bool ret = false;
 219
 220 #if FOLLY_X64
 221 #define FB_DOBTS(size)                                  \
 222   asm volatile("lock; bts" #size " %1, (%2); setnc %0"  \
 223                : "=r" (ret)                             \
 224                : "i" (Bit),                             \
 225                  "r" (&lock_)                           \
 226                : "memory", "flags")
 227
 228     switch (sizeof(IntType)) {
 229     case 2: FB_DOBTS(w); break;
 230     case 4: FB_DOBTS(l); break;
 231     case 8: FB_DOBTS(q); break;
 232     }
 233
 234 #undef FB_DOBTS
 235 #elif FOLLY_A64
 236     ret = __atomic_fetch_or(&lock_, 1 << Bit, __ATOMIC_SEQ_CST);
 237 #else
 238 #error "x86 aarch64 only"
 239 #endif
 240
 241     return ret;
 242   }
 243
 244   /*
 245    * Block until we can acquire the lock.  Uses Sleeper to wait.
 246    */
 247   void lock() const {
 248     detail::Sleeper sleeper;
 249     while (!try_lock()) {
 250       sleeper.wait();
 251     }
 252   }
 253
 254   /*
 255    * Release the lock, without changing the value of the rest of the
 256    * integer.
 257    */
 258   void unlock() const {
 259 #if FOLLY_X64
 260 #define FB_DOBTR(size)                          \
 261   asm volatile("lock; btr" #size " %0, (%1)"    \
 262                :                                \
 263                : "i" (Bit),                     \
 264                  "r" (&lock_)                   \
 265                : "memory", "flags")
 266
 267
 268     // Reads and writes can not be reordered wrt locked instructions,
 269     // so we don't need a memory fence here.
 270     switch (sizeof(IntType)) {
 271     case 2: FB_DOBTR(w); break;
 272     case 4: FB_DOBTR(l); break;
 273     case 8: FB_DOBTR(q); break;
 274     }
 275
 276 #undef FB_DOBTR
 277 #elif FOLLY_A64
 278     __atomic_fetch_and(&lock_, ~(1 << Bit), __ATOMIC_SEQ_CST);
 279 #else
 280 # error "x64 aarch64 only"
 281 #endif
 282   }
 283 };
 284
 285 //////////////////////////////////////////////////////////////////////
 286
 287 /**
 288  * Array of spinlocks where each one is padded to prevent false sharing.
 289  * Useful for shard-based locking implementations in environments where
 290  * contention is unlikely.
 291  */
 292
 293 // TODO: generate it from configure (`getconf LEVEL1_DCACHE_LINESIZE`)
 294 #define FOLLY_CACHE_LINE_SIZE 64
 295
 296 template <class T, size_t N>
 297 struct SpinLockArray {
 298   T& operator[](size_t i) {
 299     return data_[i].lock;
 300   }
 301
 302   const T& operator[](size_t i) const {
 303     return data_[i].lock;
 304   }
 305
 306   constexpr size_t size() const { return N; }
 307
 308  private:
 309   struct PaddedSpinLock {
 310     PaddedSpinLock() : lock() {}
 311     T lock;
 312     char padding[FOLLY_CACHE_LINE_SIZE - sizeof(T)];
 313   };
 314   static_assert(sizeof(PaddedSpinLock) == FOLLY_CACHE_LINE_SIZE,
 315                 "Invalid size of PaddedSpinLock");
 316
 317   // Check if T can theoretically cross a cache line.
 318   // NOTE: It should be alignof(std::max_align_t), but max_align_t
 319   // isn't supported by gcc 4.6.2.
 320   static_assert(alignof(MaxAlign) > 0 &&
 321                 FOLLY_CACHE_LINE_SIZE % alignof(MaxAlign) == 0 &&
 322                 sizeof(T) <= alignof(MaxAlign),
 323                 "T can cross cache line boundaries");
 324
 325   char padding_[FOLLY_CACHE_LINE_SIZE];
 326   std::array<PaddedSpinLock, N> data_;
 327 } __attribute__((__aligned__));
 328
 329 //////////////////////////////////////////////////////////////////////
 330
 331 typedef std::lock_guard<MicroSpinLock> MSLGuard;
 332
 333 //////////////////////////////////////////////////////////////////////
 334
 335 }
 336
 337 #endif