minor tweak to MicroLock slow path
authorNathan Bronson <ngbronson@fb.com>
Mon, 7 Mar 2016 21:36:59 +0000 (13:36 -0800)
committerFacebook Github Bot 4 <facebook-github-bot-4-bot@fb.com>
Mon, 7 Mar 2016 21:50:22 +0000 (13:50 -0800)
Summary:This diff uses the x86 "pause" instruction or its equivalent when
spinning in the MicroLock slow loop.  This gives a hint to the processor
that it should devote more resources to the other execution contexts
sharing the same core.  This diff also removes an mfence on x86 by using
a slightly stronger memory model on the preceding compare_exchange_weak,
and switches to a more portable way of invoking sched_yield().

Reviewed By: dcolascione

Differential Revision: D3018568

fb-gh-sync-id: 02e0ab3a9d9bb9901eddf54e45b71cbb7758a227
shipit-source-id: 02e0ab3a9d9bb9901eddf54e45b71cbb7758a227

folly/MicroLock.cpp
folly/MicroLock.h

index d6656dce4c32c2fc374eb596200c508a0215a04b..dd649da1b431e9a25c027c98838e78691eb62a55 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include <folly/MicroLock.h>
+#include <thread>
 
 namespace folly {
 
@@ -45,7 +46,10 @@ retry:
       }
       (void)wordPtr->futexWait(newWord, slotHeldBit);
     } else if (spins > maxSpins) {
-      sched_yield();
+      // sched_yield(), but more portable
+      std::this_thread::yield();
+    } else {
+      folly::asm_pause();
     }
     oldWord = wordPtr->load(std::memory_order_relaxed);
     goto retry;
@@ -54,14 +58,9 @@ retry:
   newWord = oldWord | slotHeldBit;
   if (!wordPtr->compare_exchange_weak(oldWord,
                                       newWord,
-                                      std::memory_order_relaxed,
+                                      std::memory_order_acquire,
                                       std::memory_order_relaxed)) {
     goto retry;
   }
-
-  // Locks are traditionally memory barriers, so we emit a full fence
-  // even though we were happy using relaxed atomics for the
-  // lock itself.
-  std::atomic_thread_fence(std::memory_order_seq_cst);
 }
 }
index 3ff158b8728570c968f3df9f022ec7842996feca..07aa40b8df592a08ac557fc3c47e5b1e649d5443 100644 (file)
@@ -193,7 +193,7 @@ void MicroLockBase<MaxSpins, MaxYields>::lock(unsigned slot) {
                                      oldWord | heldBit(slot),
                                      std::memory_order_acquire,
                                      std::memory_order_relaxed)) {
-    // Fast uncontended case: seq_cst above is our memory barrier
+    // Fast uncontended case: memory_order_acquire above is our barrier
   } else {
     // lockSlowPath doesn't have any slot-dependent computation; it
     // just shifts the input bit.  Make sure its shifting produces the