X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FMicroLock.h;h=511d06fa605860755b8f1052a4ebf391e0528346;hb=ba690cad3c4138fce4e55b319237bb9e01ca030f;hp=3ff158b8728570c968f3df9f022ec7842996feca;hpb=2f595596190c768c6c074c93c878ef8b9144fe55;p=folly.git diff --git a/folly/MicroLock.h b/folly/MicroLock.h index 3ff158b8..511d06fa 100644 --- a/folly/MicroLock.h +++ b/folly/MicroLock.h @@ -1,5 +1,5 @@ /* - * Copyright 2016 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,12 @@ #include #include +#if defined(__clang__) +#define NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address)) +#else +#define NO_SANITIZE_ADDRESS +#endif + namespace folly { /** @@ -43,11 +49,15 @@ namespace folly { * limit yourself to MicroLock slot zero, which will use the two * least-significant bits in the bottom byte. * + * (Note that such a union is safe only because MicroLock is based on + * a character type, and even under a strict interpretation of C++'s + * aliasing rules, character types may alias anything.) + * * MicroLock uses a dirty trick: it actually operates on the full - * word-size, word-aligned bit of memory into which it is embedded. + * 32-bit, four-byte-aligned bit of memory into which it is embedded. * It never modifies bits outside the ones it's defined to modify, but - * it _accesses_ all the bits in the word for purposes of - * futex management. + * it _accesses_ all the bits in the 32-bit memory location for + * purposes of futex management. * * The MaxSpins template parameter controls the number of times we * spin trying to acquire the lock. MaxYields controls the number of @@ -76,16 +86,24 @@ namespace folly { * * (The virtual dispatch benchmark is provided for scale.) * - * The contended case for MicroLock is likely to be worse compared to - * std::mutex than the contended case is. Make sure to benchmark your - * particular workload. + * While the uncontended case for MicroLock is competitive with the + * glibc 2.2.0 implementation of std::mutex, std::mutex is likely to be + * faster in the contended case, because we need to wake up all waiters + * when we release. + * + * Make sure to benchmark your particular workload. * */ class MicroLockCore { protected: +#if defined(__SANITIZE_ADDRESS__) && !defined(__clang__) && \ + (defined(__GNUC__) || defined(__GNUG__)) + uint32_t lock_; +#else uint8_t lock_; - inline detail::Futex<>* word() const; +#endif + inline detail::Futex<>* word() const; // Well, halfword on 64-bit systems inline uint32_t baseShift(unsigned slot) const; inline uint32_t heldBit(unsigned slot) const; inline uint32_t waitBit(unsigned slot) const; @@ -96,10 +114,10 @@ class MicroLockCore { unsigned maxYields); public: - inline void unlock(unsigned slot); + inline void unlock(unsigned slot) NO_SANITIZE_ADDRESS; inline void unlock() { unlock(0); } - inline void init(unsigned slot) { lock_ &= ~(3U << (2 * slot)); } - inline void init() { init(0); } + // Initializes all the slots. + inline void init() { lock_ = 0; } }; inline detail::Futex<>* MicroLockCore::word() const { @@ -113,9 +131,8 @@ inline unsigned MicroLockCore::baseShift(unsigned slot) const { unsigned offset_bytes = (unsigned)((uintptr_t)&lock_ - (uintptr_t)word()); - return kIsLittleEndian - ? offset_bytes * CHAR_BIT + slot * 2 - : CHAR_BIT * (sizeof(uint32_t) - offset_bytes - 1) + slot * 2; + return ( + unsigned)(kIsLittleEndian ? offset_bytes * CHAR_BIT + slot * 2 : CHAR_BIT * (sizeof(uint32_t) - offset_bytes - 1) + slot * 2); } inline uint32_t MicroLockCore::heldBit(unsigned slot) const { @@ -147,9 +164,9 @@ void MicroLockCore::unlock(unsigned slot) { template class MicroLockBase : public MicroLockCore { public: - inline void lock(unsigned slot); + inline void lock(unsigned slot) NO_SANITIZE_ADDRESS; inline void lock() { lock(0); } - inline bool try_lock(unsigned slot); + inline bool try_lock(unsigned slot) NO_SANITIZE_ADDRESS; inline bool try_lock() { return try_lock(0); } }; @@ -193,7 +210,7 @@ void MicroLockBase::lock(unsigned slot) { oldWord | heldBit(slot), std::memory_order_acquire, std::memory_order_relaxed)) { - // Fast uncontended case: seq_cst above is our memory barrier + // Fast uncontended case: memory_order_acquire above is our barrier } else { // lockSlowPath doesn't have any slot-dependent computation; it // just shifts the input bit. Make sure its shifting produces the