From 57b1a9599b8b3a3c571103ed481932c2768d8da9 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 2 Dec 2015 18:12:57 +0000 Subject: [PATCH] AArch64: use ldxp/stxp pair to implement 128-bit atomic loads. The ARM ARM is clear that 128-bit loads are only guaranteed to have been atomic if there has been a corresponding successful stxp. It's less clear for AArch32, so I'm leaving that alone for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254524 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 10 ++-- lib/CodeGen/AtomicExpandPass.cpp | 68 ++++++++++++---------- lib/Target/ARM/ARMISelLowering.cpp | 2 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 2 +- test/CodeGen/AArch64/arm64-atomic-128.ll | 7 ++- test/CodeGen/ARM/atomic-64bit.ll | 6 ++ 6 files changed, 57 insertions(+), 38 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index cb5a5796e98..819458dbb0f 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -130,10 +130,12 @@ public: /// support for these atomic instructions, and also have different options /// w.r.t. what they should expand to. enum class AtomicExpansionKind { - None, // Don't expand the instruction. - LLSC, // Expand the instruction into loadlinked/storeconditional; used - // by ARM/AArch64. - CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. + None, // Don't expand the instruction. + LLSC, // Expand the instruction into loadlinked/storeconditional; used + // by ARM/AArch64. + LLOnly, // Expand the (load) instruction into just a load-linked, which has + // greater atomic guarantees than a normal load. + CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. }; static ISD::NodeType getExtendForContent(BooleanContent Content) { diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index e7998db4a7c..e4b7c5a6278 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -51,7 +51,9 @@ namespace { bool expandAtomicLoadToCmpXchg(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); + bool expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function &, Value *)> PerformOp); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool isIdempotentRMW(AtomicRMWInst *AI); bool simplifyIdempotentRMW(AtomicRMWInst *AI); @@ -174,13 +176,15 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { switch (TLI->shouldExpandAtomicLoadInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; - case TargetLoweringBase::AtomicExpansionKind::LLSC: { + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC( + LI, LI->getPointerOperand(), LI->getOrdering(), + [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); + case TargetLoweringBase::AtomicExpansionKind::LLOnly: return expandAtomicLoadToLL(LI); - } - case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: return expandAtomicLoadToCmpXchg(LI); } - } llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } @@ -192,6 +196,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { // to be single-copy atomic by ARM is an ldrexd (A3.5.3). Value *Val = TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); LI->replaceAllUsesWith(Val); LI->eraseFromParent(); @@ -245,20 +250,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); } -bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { - switch (TLI->shouldExpandAtomicRMWInIR(AI)) { - case TargetLoweringBase::AtomicExpansionKind::None: - return false; - case TargetLoweringBase::AtomicExpansionKind::LLSC: { - return expandAtomicRMWToLLSC(AI); - } - case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { - return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); - } - } - llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); -} - /// Emit IR to implement the given atomicrmw operation on values in registers, /// returning the new value. static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, @@ -296,10 +287,28 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, } } -bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { - AtomicOrdering MemOpOrder = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(), + [&](IRBuilder<> &Builder, Value *Loaded) { + return performAtomicOp(AI->getOperation(), + Builder, Loaded, + AI->getValOperand()); + }); + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: + return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + default: + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); + } +} + +bool AtomicExpand::expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function &, Value *)> PerformOp) { + BasicBlock *BB = I->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); @@ -317,11 +326,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { // atomicrmw.end: // fence? // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end"); + BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); + // This grabs the DebugLoc from I. + IRBuilder<> Builder(I); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove @@ -334,8 +343,7 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.SetInsertPoint(LoopBB); Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + Value *NewVal = PerformOp(Builder, Loaded); Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); @@ -345,8 +353,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); + I->replaceAllUsesWith(Loaded); + I->eraseFromParent(); return true; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e8f3ab65bdb..33f74a3ba9f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -11891,7 +11891,7 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLSC + return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly : AtomicExpansionKind::None; } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 969edf6d557..04f5b664929 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2863,7 +2863,7 @@ TargetLowering::AtomicExpansionKind HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { // Do not expand loads and stores that don't exceed 64 bits. return LI->getType()->getPrimitiveSizeInBits() > 64 - ? AtomicExpansionKind::LLSC + ? AtomicExpansionKind::LLOnly : AtomicExpansionKind::None; } diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll index a76cf74a6d0..44c24c51f0d 100644 --- a/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -173,10 +173,13 @@ define i128 @atomic_load_seq_cst(i128* %p) { ret i128 %r } -define i128 @atomic_load_relaxed(i128* %p) { +define i128 @atomic_load_relaxed(i64, i64, i128* %p) { ; CHECK-LABEL: atomic_load_relaxed: ; CHECK-NOT: dmb -; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0] +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x2] +; CHECK-NEXT: stxp [[SUCCESS:w[0-9]+]], [[LO]], [[HI]], [x2] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] ; CHECK-NOT: dmb %r = load atomic i128, i128* %p monotonic, align 16 ret i128 %r diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index 7510d6ccdc3..573cd45c082 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -208,10 +208,16 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { define i64 @test8(i64* %ptr) { ; CHECK-LABEL: test8: ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] +; CHECK-NOT: strexd +; CHECK: clrex +; CHECK-NOT: strexd ; CHECK: dmb {{ish$}} ; CHECK-THUMB-LABEL: test8: ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB-NOT: strexd +; CHECK-THUMB: clrex +; CHECK-THUMB-NOT: strexd ; CHECK-THUMB: dmb {{ish$}} %r = load atomic i64, i64* %ptr seq_cst, align 8 -- 2.34.1