ARM: skip cmpxchg failure barrier if ordering is monotonic.
authorTim Northover <tnorthover@apple.com>
Thu, 3 Apr 2014 13:06:54 +0000 (13:06 +0000)
committerTim Northover <tnorthover@apple.com>
Thu, 3 Apr 2014 13:06:54 +0000 (13:06 +0000)
The terminal barrier of a cmpxchg expansion will be either Acquire or
SequentiallyConsistent. In either case it can be skipped if the
operation has Monotonic requirements on failure.

rdar://problem/15996804

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205535 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/ARM/ARMAtomicExpandPass.cpp
test/CodeGen/ARM/atomic-op.ll
test/CodeGen/ARM/atomic-ops-v8.ll

index 33cdda5d6e14b9edbe811408685586b5add42b28..18e07837018ab33c0f74bdd3e022f8f205f1203f 100644 (file)
@@ -230,7 +230,8 @@ bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
 }
 
 bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
-  AtomicOrdering Order = CI->getSuccessOrdering();
+  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
+  AtomicOrdering FailureOrder = CI->getFailureOrdering();
   Value *Addr = CI->getPointerOperand();
   BasicBlock *BB = CI->getParent();
   Function *F = BB->getParent();
@@ -238,24 +239,27 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
   //
-  // The standard expansion we produce is:
+  // The full expansion we produce is:
   //     [...]
   //     fence?
   // cmpxchg.start:
   //     %loaded = @load.linked(%addr)
   //     %should_store = icmp eq %loaded, %desired
-  //     br i1 %should_store, label %cmpxchg.trystore, label %cmpxchg.end
+  //     br i1 %should_store, label %cmpxchg.trystore,
+  //                          label %cmpxchg.end/%cmpxchg.barrier
   // cmpxchg.trystore:
   //     %stored = @store_conditional(%new, %addr)
   //     %try_again = icmp i32 ne %stored, 0
   //     br i1 %try_again, label %loop, label %cmpxchg.end
-  // cmpxchg.end:
+  // cmpxchg.barrier:
   //     fence?
+  //     br label %cmpxchg.end
+  // cmpxchg.end:
   //     [...]
   BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
-  BasicBlock *TryStoreBB =
-      BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
-  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
+  auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
+  auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, BarrierBB);
+  auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
 
   // This grabs the DebugLoc from CI
   IRBuilder<> Builder(CI);
@@ -265,7 +269,7 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   // the branch entirely.
   std::prev(BB->end())->eraseFromParent();
   Builder.SetInsertPoint(BB);
-  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
+  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
   Builder.CreateBr(LoopBB);
 
   // Start the main loop block now that we've taken care of the preliminaries.
@@ -273,19 +277,24 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
   Value *ShouldStore =
       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
-  Builder.CreateCondBr(ShouldStore, TryStoreBB, ExitBB);
+
+  // If the the cmpxchg doesn't actually need any ordering when it fails, we can
+  // jump straight past that fence instruction (if it exists).
+  BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
+  Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
 
   Builder.SetInsertPoint(TryStoreBB);
   Value *StoreSuccess =
       storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder);
   Value *TryAgain = Builder.CreateICmpNE(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
-  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+  Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
 
   // Finally, make sure later instructions don't get reordered with a fence if
   // necessary.
-  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-  insertTrailingFence(Builder, Order);
+  Builder.SetInsertPoint(BarrierBB);
+  insertTrailingFence(Builder, SuccessOrder);
+  Builder.CreateBr(ExitBB);
 
   CI->replaceAllUsesWith(Loaded);
   CI->eraseFromParent();
index 9a79c9fd7b1b51b8de36e112c31e5615a33ace11..ac8e949cf18cf2b004e164d9dbc5b4d21f20d9af 100644 (file)
@@ -194,3 +194,40 @@ entry:
   %0 = atomicrmw add i32* %p, i32 1 monotonic
   ret i32 %0
 }
+
+define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_cmpxchg_fail_order:
+
+  %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+; CHECK:     dmb ish
+; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK:     cmp     [[OLDVAL]], r1
+; CHECK:     bxne    lr
+; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK:     cmp     [[SUCCESS]], #0
+; CHECK:     bne     [[LOOP_BB]]
+; CHECK:     dmb     ish
+; CHECK:     bx      lr
+
+  ret i32 %oldval
+}
+
+define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_cmpxchg_fail_order1:
+
+  %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire
+; CHECK-NOT:     dmb ish
+; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK:     cmp     [[OLDVAL]], r1
+; CHECK:     bne     [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK:     cmp     [[SUCCESS]], #0
+; CHECK:     bne     [[LOOP_BB]]
+; CHECK: [[END_BB]]:
+; CHECK:     dmb     ish
+; CHECK:     bx      lr
+
+  ret i32 %oldval
+}
index 1ca78bfd1e32ad835810a36eb53218a1bb27be37..00f9006c1555be232696ee8f38bba6d7cc42fad9 100644 (file)
@@ -1075,9 +1075,10 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw
    ret i16 %old
 }
 
-define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
    %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
+   store i32 %old, i32* @var32
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
@@ -1097,8 +1098,8 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD]]
-   ret i32 %old
+; CHECK: str{{(.w)?}} r[[OLD]],
+   ret void
 }
 
 define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {