bool expandAtomicStore(StoreInst *LI);
bool expandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
-
- AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
- void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
};
}
}
bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
- // Load instructions don't actually need a leading fence, even in the
- // SequentiallyConsistent case.
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+ // If getInsertFencesForAtomic() returns true, then the target does not want
+ // to deal with memory orders, and emitLeading/TrailingFence should take care
+ // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
+ // should preserve the ordering.
AtomicOrdering MemOpOrder =
- TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()
- ? Monotonic
- : LI->getOrdering();
+ TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+ IRBuilder<> Builder(LI);
- // The only 64-bit load guaranteed to be single-copy atomic by the ARM is
+ // Note that although no fence is required before atomic load on ARM, it is
+ // required before SequentiallyConsistent loads for the recommended Power
+ // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html).
+ // So we let the target choose what to emit.
+ TLI->emitLeadingFence(Builder, LI->getOrdering(),
+ /*IsStore=*/false, /*IsLoad=*/true);
+
+ // The only 64-bit load guaranteed to be single-copy atomic by ARM is
// an ldrexd (A3.5.3).
- IRBuilder<> Builder(LI);
- Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
- Builder, LI->getPointerOperand(), MemOpOrder);
+ Value *Val =
+ TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
- insertTrailingFence(Builder, LI->getOrdering());
+ TLI->emitTrailingFence(Builder, LI->getOrdering(),
+ /*IsStore=*/false, /*IsLoad=*/true);
LI->replaceAllUsesWith(Val);
LI->eraseFromParent();
}
bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
AtomicOrdering Order = AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
+ // If getInsertFencesForAtomic() return true, then the target does not want to
+ // deal with memory orders, and emitLeading/TrailingFence should take care of
+ // everything. Otherwise, emitLeading/TrailingFence are no-op and we should
+ // preserve the ordering.
+ AtomicOrdering MemOpOrder =
+ TLI->getInsertFencesForAtomic() ? Monotonic : Order;
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
+ TLI->emitLeadingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
- Builder, Addr, MemOpOrder);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *NewVal;
switch (AI->getOperation()) {
}
Value *StoreSuccess =
- TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
- Builder, NewVal, Addr, MemOpOrder);
+ TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
Value *TryAgain = Builder.CreateICmpNE(
StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- insertTrailingFence(Builder, Order);
+ TLI->emitTrailingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
}
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
Value *Addr = CI->getPointerOperand();
BasicBlock *BB = CI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
+ // If getInsertFencesForAtomic() return true, then the target does not want to
+ // deal with memory orders, and emitLeading/TrailingFence should take care of
+ // everything. Otherwise, emitLeading/TrailingFence are no-op and we should
+ // preserve the ordering.
+ AtomicOrdering MemOpOrder =
+ TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
// Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
//
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
- Builder, Addr, MemOpOrder);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *ShouldStore =
Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess =
- TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
- Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ Value *StoreSuccess = TLI->emitStoreConditional(
+ Builder, CI->getNewValOperand(), Addr, MemOpOrder);
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
Builder.CreateCondBr(StoreSuccess, SuccessBB,
// Make sure later instructions don't get reordered with a fence if necessary.
Builder.SetInsertPoint(SuccessBB);
- insertTrailingFence(Builder, SuccessOrder);
+ TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
Builder.CreateBr(ExitBB);
Builder.SetInsertPoint(FailureBB);
- insertTrailingFence(Builder, FailureOrder);
+ TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
Builder.CreateBr(ExitBB);
// Finally, we have control-flow based knowledge of whether the cmpxchg
CI->eraseFromParent();
return true;
}
-
-AtomicOrdering AtomicExpand::insertLeadingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord) {
- if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
- return Ord;
-
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- Builder.CreateFence(Release);
-
- // The exclusive operations don't need any barrier if we're adding separate
- // fences.
- return Monotonic;
-}
-
-void AtomicExpand::insertTrailingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord) {
- if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
- return;
-
- if (Ord == Acquire || Ord == AcquireRelease)
- Builder.CreateFence(Acquire);
- else if (Ord == SequentiallyConsistent)
- Builder.CreateFence(SequentiallyConsistent);
-}
ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
- unsigned Domain = ARM_MB::ISH;
+ ARM_MB::MemBOpt Domain = ARM_MB::ISH;
if (Subtarget->isMClass()) {
// Only a full system barrier exists in the M-class architectures.
Domain = ARM_MB::SY;
return true;
}
+static void makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+ Constant *CDomain = Builder.getInt32(Domain);
+ Builder.CreateCall(DMB, CDomain);
+}
+
+// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return;
+
+ switch (Ord) {
+ case NotAtomic:
+ case Unordered:
+ llvm_unreachable("Invalid fence: unordered/non-atomic");
+ case Monotonic:
+ case Acquire:
+ return; // Nothing to do
+ case SequentiallyConsistent:
+ if (!IsStore)
+ return; // Nothing to do
+ /*FALLTHROUGH*/
+ case Release:
+ case AcquireRelease:
+ if (Subtarget->isSwift())
+ makeDMB(Builder, ARM_MB::ISHST);
+ // FIXME: add a comment with a link to documentation justifying this.
+ else
+ makeDMB(Builder, ARM_MB::ISH);
+ return;
+ }
+}
+
+void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return;
+
+ switch (Ord) {
+ case NotAtomic:
+ case Unordered:
+ llvm_unreachable("Invalid fence: unordered/not-atomic");
+ case Monotonic:
+ case Release:
+ return; // Nothing to do
+ case Acquire:
+ case AcquireRelease:
+ case SequentiallyConsistent:
+ makeDMB(Builder, ARM_MB::ISH);
+ return;
+ }
+}
+
bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
// Loads and stores less than 64-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
+ void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
bool shouldExpandAtomicInIR(Instruction *Inst) const override;
bool useLoadStackGuardNode() const override;
define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
; CHECK-LABEL: @test_atomic_xchg_i8
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
ret i8 %res
define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
; CHECK-LABEL: @test_atomic_add_i16
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i16 [[OLDVAL]]
%res = atomicrmw add i16* %ptr, i16 %addend seq_cst
ret i16 %res
define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
; CHECK-LABEL: @test_atomic_sub_i32
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i32 [[OLDVAL]]
%res = atomicrmw sub i32* %ptr, i32 %subend acquire
ret i32 %res
define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
; CHECK-LABEL: @test_atomic_and_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw and i8* %ptr, i8 %andend release
ret i8 %res
define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
; CHECK-LABEL: @test_atomic_nand_i16
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i16 [[OLDVAL]]
%res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
ret i16 %res
define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
; CHECK-LABEL: @test_atomic_or_i64
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i64 [[OLDVAL]]
%res = atomicrmw or i64* %ptr, i64 %orend seq_cst
ret i64 %res
define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
; CHECK-LABEL: @test_atomic_xor_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
ret i8 %res
define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
; CHECK-LABEL: @test_atomic_max_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
ret i8 %res
define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
; CHECK-LABEL: @test_atomic_min_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw min i8* %ptr, i8 %minend seq_cst
ret i8 %res
define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
; CHECK-LABEL: @test_atomic_umax_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
ret i8 %res
define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
; CHECK-LABEL: @test_atomic_umin_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: ret i8 [[OLDVAL]]
%res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
ret i8 %res
define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[FAILURE_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[FAILURE_BB]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[DONE:.*]]
; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
; CHECK: [[DONE]]:
define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
; CHECK-LABEL: @test_cmpxchg_seq_cst
-; CHECK: fence release
+; Intrinsic for "dmb ishst" is then expected
+; CHECK: call void @llvm.arm.dmb(i32 10)
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END:.*]]
; CHECK: [[FAILURE_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END]]
; CHECK: [[END]]:
define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
; CHECK-LABEL: @test_cmpxchg_weak_fail
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 10)
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END:.*]]
; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[END]]
; CHECK: [[END]]:
define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
; CHECK-LABEL: @test_cmpxchg_monotonic
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[END:.*]]
; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
; CHECK: br label %[[END]]
; CHECK: [[END]]: