folly/SmallLocks.h

   1 /*
   2  * Copyright 2012 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef FOLLY_SMALLLOCKS_H_
  18 #define FOLLY_SMALLLOCKS_H_
  19
  20 /*
  21  * This header defines a few very small mutex types.  These are useful
  22  * in highly memory-constrained environments where contention is
  23  * unlikely.
  24  *
  25  * Note: these locks are for use when you aren't likely to contend on
  26  * the critical section, or when the critical section is incredibly
  27  * small.  Given that, both of the locks defined in this header are
  28  * inherently unfair: that is, the longer a thread is waiting, the
  29  * longer it waits between attempts to acquire, so newer waiters are
  30  * more likely to get the mutex.  For the intended use-case this is
  31  * fine.
  32  *
  33  * @author Keith Adams <kma@fb.com>
  34  * @author Jordan DeLong <delong.j@fb.com>
  35  */
  36
  37 #include <array>
  38 #include <cinttypes>
  39 #include <type_traits>
  40 #include <ctime>
  41 #include <boost/noncopyable.hpp>
  42 #include <cstdlib>
  43 #include <pthread.h>
  44 #include <mutex>
  45
  46 #include <glog/logging.h>
  47
  48 #ifndef __x86_64__
  49 # error "SmallLocks.h is currently x64-only."
  50 #endif
  51
  52 namespace folly {
  53
  54 //////////////////////////////////////////////////////////////////////
  55
  56 namespace detail {
  57
  58   /*
  59    * A helper object for the condended case. Starts off with eager
  60    * spinning, and falls back to sleeping for small quantums.
  61    */
  62   class Sleeper {
  63     static const uint32_t kMaxActiveSpin = 4000;
  64
  65     uint32_t spinCount;
  66
  67   public:
  68     Sleeper() : spinCount(0) {}
  69
  70     void wait() {
  71       if (spinCount < kMaxActiveSpin) {
  72         ++spinCount;
  73         asm volatile("pause");
  74       } else {
  75         /*
  76          * Always sleep 0.5ms, assuming this will make the kernel put
  77          * us down for whatever its minimum timer resolution is (in
  78          * linux this varies by kernel version from 1ms to 10ms).
  79          */
  80         struct timespec ts = { 0, 500000 };
  81         nanosleep(&ts, NULL);
  82       }
  83     }
  84   };
  85
  86 }
  87
  88 //////////////////////////////////////////////////////////////////////
  89
  90 /*
  91  * A really, *really* small spinlock for fine-grained locking of lots
  92  * of teeny-tiny data.
  93  *
  94  * Zero initializing these is guaranteed to be as good as calling
  95  * init(), since the free state is guaranteed to be all-bits zero.
  96  *
  97  * This class should be kept a POD, so we can used it in other packed
  98  * structs (gcc does not allow __attribute__((packed)) on structs that
  99  * contain non-POD data).  This means avoid adding a constructor, or
 100  * making some members private, etc.
 101  */
 102 struct MicroSpinLock {
 103   enum { FREE = 0, LOCKED = 1 };
 104   uint8_t lock_;
 105
 106   /*
 107    * Atomically move lock_ from "compare" to "newval". Return boolean
 108    * success. Do not play on or around.
 109    */
 110   bool cas(uint8_t compare, uint8_t newVal) {
 111     bool out;
 112     asm volatile("lock; cmpxchgb %2, (%3);"
 113                  "setz %0;"
 114                  : "=r" (out)
 115                  : "a" (compare), // cmpxchgb constrains this to be in %al
 116                    "q" (newVal),  // Needs to be byte-accessible
 117                    "r" (&lock_)
 118                  : "memory", "flags");
 119     return out;
 120   }
 121
 122   // Initialize this MSL.  It is unnecessary to call this if you
 123   // zero-initialize the MicroSpinLock.
 124   void init() {
 125     lock_ = FREE;
 126   }
 127
 128   bool try_lock() {
 129     return cas(FREE, LOCKED);
 130   }
 131
 132   void lock() {
 133     detail::Sleeper sleeper;
 134     do {
 135       while (lock_ != FREE) {
 136         asm volatile("" : : : "memory");
 137         sleeper.wait();
 138       }
 139     } while (!try_lock());
 140     DCHECK(lock_ == LOCKED);
 141   }
 142
 143   void unlock() {
 144     CHECK(lock_ == LOCKED);
 145     asm volatile("" : : : "memory");
 146     lock_ = FREE; // release barrier on x86
 147   }
 148 };
 149
 150 //////////////////////////////////////////////////////////////////////
 151
 152 /*
 153  * Spin lock on a single bit in an integral type.  You can use this
 154  * with 16, 32, or 64-bit integral types.
 155  *
 156  * This is useful if you want a small lock and already have an int
 157  * with a bit in it that you aren't using.  But note that it can't be
 158  * as small as MicroSpinLock (1 byte), if you don't already have a
 159  * convenient int with an unused bit lying around to put it on.
 160  *
 161  * To construct these, either use init() or zero initialize.  We don't
 162  * have a real constructor because we want this to be a POD type so we
 163  * can put it into packed structs.
 164  */
 165 template<class IntType, int Bit = sizeof(IntType) * 8 - 1>
 166 struct PicoSpinLock {
 167   // Internally we deal with the unsigned version of the type.
 168   typedef typename std::make_unsigned<IntType>::type UIntType;
 169
 170   static_assert(std::is_integral<IntType>::value,
 171                 "PicoSpinLock needs an integral type");
 172   static_assert(sizeof(IntType) == 2 || sizeof(IntType) == 4 ||
 173                   sizeof(IntType) == 8,
 174                 "PicoSpinLock can't work on integers smaller than 2 bytes");
 175
 176  public:
 177   static const UIntType kLockBitMask_ = UIntType(1) << Bit;
 178   UIntType lock_;
 179
 180   /*
 181    * You must call this function before using this class, if you
 182    * default constructed it.  If you zero-initialized it you can
 183    * assume the PicoSpinLock is in a valid unlocked state with
 184    * getData() == 0.
 185    *
 186    * (This doesn't use a constructor because we want to be a POD.)
 187    */
 188   void init(IntType initialValue = 0) {
 189     CHECK(!(initialValue & kLockBitMask_));
 190     lock_ = initialValue;
 191   }
 192
 193   /*
 194    * Returns the value of the integer we using for our lock, except
 195    * with the bit we are using as a lock cleared, regardless of
 196    * whether the lock is held.
 197    *
 198    * It is 'safe' to call this without holding the lock.  (As in: you
 199    * get the same guarantees for simultaneous accesses to an integer
 200    * as you normally get.)
 201    */
 202   IntType getData() const {
 203     return static_cast<IntType>(lock_ & ~kLockBitMask_);
 204   }
 205
 206   /*
 207    * Set the value of the other bits in our integer.
 208    *
 209    * Don't use this when you aren't holding the lock, unless it can be
 210    * guaranteed that no other threads may be trying to use this.
 211    */
 212   void setData(IntType w) {
 213     CHECK(!(w & kLockBitMask_));
 214     lock_ = (lock_ & kLockBitMask_) | w;
 215   }
 216
 217   /*
 218    * Try to get the lock without blocking: returns whether or not we
 219    * got it.
 220    */
 221   bool try_lock() const {
 222     bool ret = false;
 223
 224 #define FB_DOBTS(size)                                  \
 225   asm volatile("lock; bts" #size " %1, (%2); setnc %0"  \
 226                : "=r" (ret)                             \
 227                : "i" (Bit),                             \
 228                  "r" (&lock_)                           \
 229                : "memory", "flags")
 230
 231     switch (sizeof(IntType)) {
 232     case 2: FB_DOBTS(w); break;
 233     case 4: FB_DOBTS(l); break;
 234     case 8: FB_DOBTS(q); break;
 235     }
 236
 237 #undef FB_DOBTS
 238
 239     return ret;
 240   }
 241
 242   /*
 243    * Block until we can acquire the lock.  Uses Sleeper to wait.
 244    */
 245   void lock() const {
 246     detail::Sleeper sleeper;
 247     while (!try_lock()) {
 248       sleeper.wait();
 249     }
 250   }
 251
 252   /*
 253    * Release the lock, without changing the value of the rest of the
 254    * integer.
 255    */
 256   void unlock() const {
 257 #define FB_DOBTR(size)                          \
 258   asm volatile("lock; btr" #size " %0, (%1)"    \
 259                :                                \
 260                : "i" (Bit),                     \
 261                  "r" (&lock_)                   \
 262                : "memory", "flags")
 263
 264
 265     // Reads and writes can not be reordered wrt locked instructions,
 266     // so we don't need a memory fence here.
 267     switch (sizeof(IntType)) {
 268     case 2: FB_DOBTR(w); break;
 269     case 4: FB_DOBTR(l); break;
 270     case 8: FB_DOBTR(q); break;
 271     }
 272
 273 #undef FB_DOBTR
 274   }
 275 };
 276
 277 //////////////////////////////////////////////////////////////////////
 278
 279 /**
 280  * Array of spinlocks where each one is padded to prevent false sharing.
 281  * Useful for shard-based locking implementations in environments where
 282  * contention is unlikely.
 283  */
 284
 285 // TODO: generate it from configure (`getconf LEVEL1_DCACHE_LINESIZE`)
 286 #define FOLLY_CACHE_LINE_SIZE 64
 287
 288 template <class T, size_t N>
 289 struct SpinLockArray {
 290   T& operator[](size_t i) {
 291     return data_[i].lock;
 292   }
 293
 294   const T& operator[](size_t i) const {
 295     return data_[i].lock;
 296   }
 297
 298   constexpr size_t size() const { return N; }
 299
 300  private:
 301   struct PaddedSpinLock {
 302     PaddedSpinLock() : lock() { }
 303     T lock;
 304     char padding[FOLLY_CACHE_LINE_SIZE - sizeof(T)];
 305   };
 306   static_assert(sizeof(PaddedSpinLock) == FOLLY_CACHE_LINE_SIZE,
 307                 "Invalid size of PaddedSpinLock");
 308
 309   // Check if T can theoretically cross a cache line.
 310   // NOTE: It should be alignof(std::max_align_t), but max_align_t
 311   // isn't supported by gcc 4.6.2.
 312   struct MaxAlign { char c; } __attribute__((aligned));
 313   static_assert(alignof(MaxAlign) > 0 &&
 314                 FOLLY_CACHE_LINE_SIZE % alignof(MaxAlign) == 0 &&
 315                 sizeof(T) <= alignof(MaxAlign),
 316                 "T can cross cache line boundaries");
 317
 318   char padding_[FOLLY_CACHE_LINE_SIZE];
 319   std::array<PaddedSpinLock, N> data_;
 320 } __attribute__((aligned));
 321
 322 //////////////////////////////////////////////////////////////////////
 323
 324 typedef std::lock_guard<MicroSpinLock> MSLGuard;
 325
 326 //////////////////////////////////////////////////////////////////////
 327
 328 }
 329
 330 #endif