From: Tim Northover Date: Mon, 8 Apr 2013 08:40:41 +0000 (+0000) Subject: AArch64: remove barriers from AArch64 atomic operations. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=211ffd242df8bacf4cbe034f5ca7545ab75b45df;p=oota-llvm.git AArch64: remove barriers from AArch64 atomic operations. I've managed to convince myself that AArch64's acquire/release instructions are sufficient to guarantee C++11's required semantics, even in the sequentially-consistent case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179005 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 46b822152a0..468c56156a4 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -88,6 +88,8 @@ public: bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64); + SDNode *TrySelectToMoveImm(SDNode *N); SDNode *LowerToFPLitPool(SDNode *Node); SDNode *SelectToLitPool(SDNode *N); @@ -318,6 +320,38 @@ AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, return true; } +SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, + unsigned Op16,unsigned Op32, + unsigned Op64) { + // Mostly direct translation to the given operations, except that we preserve + // the AtomicOrdering for use later on. + AtomicSDNode *AN = cast(Node); + EVT VT = AN->getMemoryVT(); + + unsigned Op; + if (VT == MVT::i8) + Op = Op8; + else if (VT == MVT::i16) + Op = Op16; + else if (VT == MVT::i32) + Op = Op32; + else if (VT == MVT::i64) + Op = Op64; + else + llvm_unreachable("Unexpected atomic operation"); + + SmallVector Ops; + for (unsigned i = 1; i < AN->getNumOperands(); ++i) + Ops.push_back(AN->getOperand(i)); + + Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); + Ops.push_back(AN->getOperand(0)); // Chain moves to the end + + return CurDAG->SelectNodeTo(Node, Op, + AN->getValueType(0), MVT::Other, + &Ops[0], Ops.size()); +} + SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); @@ -328,6 +362,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { } switch (Node->getOpcode()) { + case ISD::ATOMIC_LOAD_ADD: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_ADD_I8, + AArch64::ATOMIC_LOAD_ADD_I16, + AArch64::ATOMIC_LOAD_ADD_I32, + AArch64::ATOMIC_LOAD_ADD_I64); + case ISD::ATOMIC_LOAD_SUB: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_SUB_I8, + AArch64::ATOMIC_LOAD_SUB_I16, + AArch64::ATOMIC_LOAD_SUB_I32, + AArch64::ATOMIC_LOAD_SUB_I64); + case ISD::ATOMIC_LOAD_AND: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_AND_I8, + AArch64::ATOMIC_LOAD_AND_I16, + AArch64::ATOMIC_LOAD_AND_I32, + AArch64::ATOMIC_LOAD_AND_I64); + case ISD::ATOMIC_LOAD_OR: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_OR_I8, + AArch64::ATOMIC_LOAD_OR_I16, + AArch64::ATOMIC_LOAD_OR_I32, + AArch64::ATOMIC_LOAD_OR_I64); + case ISD::ATOMIC_LOAD_XOR: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_XOR_I8, + AArch64::ATOMIC_LOAD_XOR_I16, + AArch64::ATOMIC_LOAD_XOR_I32, + AArch64::ATOMIC_LOAD_XOR_I64); + case ISD::ATOMIC_LOAD_NAND: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_NAND_I8, + AArch64::ATOMIC_LOAD_NAND_I16, + AArch64::ATOMIC_LOAD_NAND_I32, + AArch64::ATOMIC_LOAD_NAND_I64); + case ISD::ATOMIC_LOAD_MIN: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_MIN_I8, + AArch64::ATOMIC_LOAD_MIN_I16, + AArch64::ATOMIC_LOAD_MIN_I32, + AArch64::ATOMIC_LOAD_MIN_I64); + case ISD::ATOMIC_LOAD_MAX: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_MAX_I8, + AArch64::ATOMIC_LOAD_MAX_I16, + AArch64::ATOMIC_LOAD_MAX_I32, + AArch64::ATOMIC_LOAD_MAX_I64); + case ISD::ATOMIC_LOAD_UMIN: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_UMIN_I8, + AArch64::ATOMIC_LOAD_UMIN_I16, + AArch64::ATOMIC_LOAD_UMIN_I32, + AArch64::ATOMIC_LOAD_UMIN_I64); + case ISD::ATOMIC_LOAD_UMAX: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_UMAX_I8, + AArch64::ATOMIC_LOAD_UMAX_I16, + AArch64::ATOMIC_LOAD_UMAX_I32, + AArch64::ATOMIC_LOAD_UMAX_I64); + case ISD::ATOMIC_SWAP: + return SelectAtomic(Node, + AArch64::ATOMIC_SWAP_I8, + AArch64::ATOMIC_SWAP_I16, + AArch64::ATOMIC_SWAP_I32, + AArch64::ATOMIC_SWAP_I64); + case ISD::ATOMIC_CMP_SWAP: + return SelectAtomic(Node, + AArch64::ATOMIC_CMP_SWAP_I8, + AArch64::ATOMIC_CMP_SWAP_I16, + AArch64::ATOMIC_CMP_SWAP_I32, + AArch64::ATOMIC_CMP_SWAP_I64); case ISD::FrameIndex: { int FI = cast(Node)->getIndex(); EVT PtrTy = TLI.getPointerTy(); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index e9f449709c4..6deae75488e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -59,12 +59,9 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) computeRegisterProperties(); - // Some atomic operations can be folded into load-acquire or store-release - // instructions on AArch64. It's marginally simpler to let LLVM expand - // everything out to a barrier and then recombine the (few) barriers we can. - setInsertFencesForAtomic(true); - setTargetDAGCombine(ISD::ATOMIC_FENCE); - setTargetDAGCombine(ISD::ATOMIC_STORE); + // We have particularly efficient implementations of atomic fences if they can + // be combined with nearby atomic loads and stores. + setShouldFoldAtomicFences(true); // We combine OR nodes for bitfield and NEON BSL operations. setTargetDAGCombine(ISD::OR); @@ -275,27 +272,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { return VT.changeVectorElementTypeToInteger(); } -static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc, - unsigned &strOpc) { - switch (Size) { - default: llvm_unreachable("unsupported size for atomic binary op!"); - case 1: - ldrOpc = AArch64::LDXR_byte; - strOpc = AArch64::STXR_byte; - break; - case 2: - ldrOpc = AArch64::LDXR_hword; - strOpc = AArch64::STXR_hword; - break; - case 4: - ldrOpc = AArch64::LDXR_word; - strOpc = AArch64::STXR_word; - break; - case 8: - ldrOpc = AArch64::LDXR_dword; - strOpc = AArch64::STXR_dword; - break; - } +static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, + unsigned &LdrOpc, + unsigned &StrOpc) { + static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, + AArch64::LDXR_word, AArch64::LDXR_dword}; + static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, + AArch64::LDAXR_word, AArch64::LDAXR_dword}; + static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, + AArch64::STXR_word, AArch64::STXR_dword}; + static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword, + AArch64::STLXR_word, AArch64::STLXR_dword}; + + unsigned *LoadOps, *StoreOps; + if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) + LoadOps = LoadAcqs; + else + LoadOps = LoadBares; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + StoreOps = StoreRels; + else + StoreOps = StoreBares; + + assert(isPowerOf2_32(Size) && Size <= 8 && + "unsupported size for atomic binary op!"); + + LdrOpc = LoadOps[Log2_32(Size)]; + StrOpc = StoreOps[Log2_32(Size)]; } MachineBasicBlock * @@ -313,12 +317,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -397,6 +402,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); + unsigned oldval = dest; DebugLoc dl = MI->getDebugLoc(); @@ -411,7 +418,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, } unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -479,6 +486,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, unsigned ptr = MI->getOperand(1).getReg(); unsigned oldval = MI->getOperand(2).getReg(); unsigned newval = MI->getOperand(3).getReg(); + AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -487,7 +495,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -2377,78 +2385,6 @@ static SDValue PerformANDCombine(SDNode *N, DAG.getConstant(LSB + Width - 1, MVT::i64)); } -static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, - TargetLowering::DAGCombinerInfo &DCI) { - // An atomic operation followed by an acquiring atomic fence can be reduced to - // an acquiring load. The atomic operation provides a convenient pointer to - // load from. If the original operation was a load anyway we can actually - // combine the two operations into an acquiring load. - SelectionDAG &DAG = DCI.DAG; - SDValue AtomicOp = FenceNode->getOperand(0); - AtomicSDNode *AtomicNode = dyn_cast(AtomicOp); - - // A fence on its own can't be optimised - if (!AtomicNode) - return SDValue(); - - AtomicOrdering FenceOrder - = static_cast(FenceNode->getConstantOperandVal(1)); - SynchronizationScope FenceScope - = static_cast(FenceNode->getConstantOperandVal(2)); - - if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope()) - return SDValue(); - - // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so - // the chain we use should be its input, otherwise we'll put our store after - // it so we use its output chain. - SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ? - AtomicNode->getChain() : AtomicOp; - - // We have an acquire fence with a handy atomic operation nearby, we can - // convert the fence into a load-acquire, discarding the result. - DebugLoc DL = FenceNode->getDebugLoc(); - SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(), - AtomicNode->getValueType(0), - Chain, // Chain - AtomicOp.getOperand(1), // Pointer - AtomicNode->getMemOperand(), Acquire, - FenceScope); - - if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD) - DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode()); - - return Op.getValue(1); -} - -static SDValue PerformATOMIC_STORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // A releasing atomic fence followed by an atomic store can be combined into a - // single store operation. - SelectionDAG &DAG = DCI.DAG; - AtomicSDNode *AtomicNode = cast(N); - SDValue FenceOp = AtomicNode->getOperand(0); - - if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE) - return SDValue(); - - AtomicOrdering FenceOrder - = static_cast(FenceOp->getConstantOperandVal(1)); - SynchronizationScope FenceScope - = static_cast(FenceOp->getConstantOperandVal(2)); - - if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope()) - return SDValue(); - - DebugLoc DL = AtomicNode->getDebugLoc(); - return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(), - FenceOp.getOperand(0), // Chain - AtomicNode->getOperand(1), // Pointer - AtomicNode->getOperand(2), // Value - AtomicNode->getMemOperand(), Release, - FenceScope); -} - /// For a true bitfield insert, the bits getting into that contiguous mask /// should come from the low part of an existing value: they must be formed from /// a compatible SHL operation (unless they're already low). This function @@ -2804,8 +2740,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::AND: return PerformANDCombine(N, DCI); - case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI); - case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::SRA: return PerformSRACombine(N, DCI); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 37be5e4892e..ff21c223e9a 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -159,49 +159,55 @@ let Defs = [XSP], Uses = [XSP] in { // Atomic operation pseudo-instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1 in { -multiclass AtomicSizes { - def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast(opname # "_8") i64:$ptr, i32:$incr))]>; - def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast(opname # "_16") i64:$ptr, i32:$incr))]>; - def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast(opname # "_32") i64:$ptr, i32:$incr))]>; - def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), - [(set i64:$dst, (!cast(opname # "_64") i64:$ptr, i64:$incr))]>; -} -} - -defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">; -defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">; -defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; -defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; -defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; -defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; -defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; +// These get selected from C++ code as a pretty much direct translation from the +// generic DAG nodes. The one exception is the AtomicOrdering is added as an +// operand so that the eventual lowering can make use of it and choose +// acquire/release operations when required. + +let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { +multiclass AtomicSizes { + def _I8 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I16 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I32 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I64 : PseudoInst<(outs GPR64:$dst), + (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>; +} +} + +defm ATOMIC_LOAD_ADD : AtomicSizes; +defm ATOMIC_LOAD_SUB : AtomicSizes; +defm ATOMIC_LOAD_AND : AtomicSizes; +defm ATOMIC_LOAD_OR : AtomicSizes; +defm ATOMIC_LOAD_XOR : AtomicSizes; +defm ATOMIC_LOAD_NAND : AtomicSizes; +defm ATOMIC_SWAP : AtomicSizes; let Defs = [NZCV] in { // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; - defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; - defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; - defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; -} - -let usesCustomInserter = 1, Defs = [NZCV] in { -def ATOMIC_CMP_SWAP_I8 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I16 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I32 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I64 - : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new), - [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>; + defm ATOMIC_LOAD_MIN : AtomicSizes; + defm ATOMIC_LOAD_MAX : AtomicSizes; + defm ATOMIC_LOAD_UMIN : AtomicSizes; + defm ATOMIC_LOAD_UMAX : AtomicSizes; } +class AtomicCmpSwap + : PseudoInst<(outs GPRData:$dst), + (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new, + i32imm:$ordering), []> { + let usesCustomInserter = 1; + let hasCtrlDep = 1; + let mayLoad = 1; + let mayStore = 1; + let Defs = [NZCV]; +} + +def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; +def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; +def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; +def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; + //===----------------------------------------------------------------------===// // Add-subtract (extended register) instructions //===----------------------------------------------------------------------===// @@ -2579,7 +2585,8 @@ defm LDAR : A64I_LRex<"ldar", 0b101>; class acquiring_load : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - return cast(N)->getOrdering() == Acquire; + AtomicOrdering Ordering = cast(N)->getOrdering(); + return Ordering == Acquire || Ordering == SequentiallyConsistent; }]>; def atomic_load_acquire_8 : acquiring_load; @@ -2610,7 +2617,8 @@ class A64I_SLexs_impl size, bits<3> opcode, string asm, dag outs, class releasing_store : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - return cast(N)->getOrdering() == Release; + AtomicOrdering Ordering = cast(N)->getOrdering(); + return Ordering == Release || Ordering == SequentiallyConsistent; }]>; def atomic_store_release_8 : releasing_store; diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll index 3c03e47147b..9888a742e32 100644 --- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll +++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s define i32 @foo(i32* %var, i1 %cond) { ; CHECK: foo: @@ -9,7 +9,9 @@ simple_ver: store i32 %newval, i32* %var br label %somewhere atomic_ver: - %val = atomicrmw add i32* %var, i32 -1 seq_cst + fence seq_cst + %val = atomicrmw add i32* %var, i32 -1 monotonic + fence seq_cst br label %somewhere ; CHECK: dmb ; CHECK: ldxr diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index f3c16171cc8..5e87f21a217 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -8,18 +8,18 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_add_i8: %old = atomicrmw add i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -27,19 +27,19 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_add_i16: - %old = atomicrmw add i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw add i16* @var16, i16 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -47,8 +47,8 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_add_i32: - %old = atomicrmw add i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw add i32* @var32, i32 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 @@ -57,9 +57,9 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -67,8 +67,8 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_add_i64: - %old = atomicrmw add i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw add i64* @var64, i64 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 @@ -79,7 +79,7 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -87,8 +87,8 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_sub_i8: - %old = atomicrmw sub i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw sub i8* @var8, i8 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 @@ -99,7 +99,7 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -107,8 +107,8 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_sub_i16: - %old = atomicrmw sub i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw sub i16* @var16, i16 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 @@ -117,9 +117,9 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -127,19 +127,19 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_sub_i32: - %old = atomicrmw sub i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw sub i32* @var32, i32 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -148,18 +148,18 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_sub_i64: %old = atomicrmw sub i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -167,8 +167,8 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_and_i8: - %old = atomicrmw and i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw and i8* @var8, i8 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 @@ -177,9 +177,9 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -187,8 +187,8 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_and_i16: - %old = atomicrmw and i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw and i16* @var16, i16 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 @@ -199,7 +199,7 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -208,18 +208,18 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_and_i32: %old = atomicrmw and i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -227,19 +227,19 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_and_i64: - %old = atomicrmw and i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw and i64* @var64, i64 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -248,18 +248,18 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_or_i8: %old = atomicrmw or i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -267,8 +267,8 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_or_i16: - %old = atomicrmw or i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw or i16* @var16, i16 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 @@ -279,7 +279,7 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -287,19 +287,19 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_or_i32: - %old = atomicrmw or i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw or i32* @var32, i32 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -307,8 +307,8 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_or_i64: - %old = atomicrmw or i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw or i64* @var64, i64 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 @@ -317,9 +317,9 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -327,19 +327,19 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_xor_i8: - %old = atomicrmw xor i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw xor i8* @var8, i8 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -347,8 +347,8 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_xor_i16: - %old = atomicrmw xor i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw xor i16* @var16, i16 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 @@ -357,9 +357,9 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -368,18 +368,18 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_xor_i32: %old = atomicrmw xor i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -387,8 +387,8 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_xor_i64: - %old = atomicrmw xor i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw xor i64* @var64, i64 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 @@ -399,7 +399,7 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -407,8 +407,8 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_xchg_i8: - %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw xchg i8* @var8, i8 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 @@ -418,7 +418,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -427,17 +427,17 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_xchg_i16: %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] +; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -445,8 +445,8 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_xchg_i32: - %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw xchg i32* @var32, i32 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 @@ -454,9 +454,9 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -464,18 +464,18 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_xchg_i64: - %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw xchg i64* @var64, i64 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -484,20 +484,20 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_min_i8: - %old = atomicrmw min i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw min i8* @var8, i8 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -505,8 +505,8 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_min_i16: - %old = atomicrmw min i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw min i16* @var16, i16 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 @@ -516,9 +516,9 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt -; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -526,8 +526,8 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_min_i32: - %old = atomicrmw min i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw min i32* @var32, i32 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 @@ -539,7 +539,7 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -548,19 +548,19 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_min_i64: %old = atomicrmw min i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -569,19 +569,19 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_max_i8: %old = atomicrmw max i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -589,20 +589,20 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_max_i16: - %old = atomicrmw max i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw max i16* @var16, i16 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], sxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -610,8 +610,8 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_max_i32: - %old = atomicrmw max i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw max i32* @var32, i32 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 @@ -621,9 +621,9 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -631,8 +631,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_max_i64: - %old = atomicrmw max i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw max i64* @var64, i64 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 @@ -644,7 +644,7 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -652,8 +652,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_umin_i8: - %old = atomicrmw umin i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw umin i8* @var8, i8 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 @@ -665,7 +665,7 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -673,20 +673,20 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_umin_i16: - %old = atomicrmw umin i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw umin i16* @var16, i16 %offset acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxth ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -695,19 +695,19 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_umin_i32: %old = atomicrmw umin i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -715,20 +715,20 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_umin_i64: - %old = atomicrmw umin i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw umin i64* @var64, i64 %offset acq_rel +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -736,20 +736,20 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK: test_atomic_load_umax_i8: - %old = atomicrmw umax i8* @var8, i8 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw umax i8* @var8, i8 %offset acq_rel +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo -; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -757,8 +757,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK: test_atomic_load_umax_i16: - %old = atomicrmw umax i16* @var16, i16 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw umax i16* @var16, i16 %offset monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 @@ -770,7 +770,7 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -779,19 +779,19 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK: test_atomic_load_umax_i32: %old = atomicrmw umax i32* @var32, i32 %offset seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w0, w[[OLD]] ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -799,8 +799,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK: test_atomic_load_umax_i64: - %old = atomicrmw umax i64* @var64, i64 %offset seq_cst -; CHECK: dmb ish + %old = atomicrmw umax i64* @var64, i64 %offset release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 @@ -810,9 +810,9 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; function there. ; CHECK-NEXT: cmp x0, x[[OLD]] ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo -; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -820,13 +820,13 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK: test_atomic_cmpxchg_i8: - %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst -; CHECK: dmb ish + %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: -; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w[[OLD]], w0 @@ -834,7 +834,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; As above, w1 is a reasonable guess. ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i8 %old @@ -843,20 +843,20 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK: test_atomic_cmpxchg_i16: %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: -; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. -; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i16 %old @@ -864,8 +864,8 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK: test_atomic_cmpxchg_i32: - %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst -; CHECK: dmb ish + %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 @@ -876,9 +876,9 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; As above, w1 is a reasonable guess. -; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i32 %old @@ -886,8 +886,8 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK: test_atomic_cmpxchg_i64: - %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst -; CHECK: dmb ish + %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 @@ -900,7 +900,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; As above, w1 is a reasonable guess. ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] -; CHECK: dmb ish +; CHECK-NOT: dmb ; CHECK: mov x0, x[[OLD]] ret i64 %old @@ -933,19 +933,26 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind { define i8 @test_atomic_load_acquire_i8() nounwind { ; CHECK: test_atomic_load_acquire_i8: %val = load atomic i8* @var8 acquire, align 1 +; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK-NOT: dmb ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 - +; CHECK-NOT: dmb ; CHECK: ldarb w0, [x[[ADDR]]] +; CHECK-NOT: dmb ret i8 %val } define i8 @test_atomic_load_seq_cst_i8() nounwind { ; CHECK: test_atomic_load_seq_cst_i8: %val = load atomic i8* @var8 seq_cst, align 1 -; CHECK: adrp x[[HIADDR:[0-9]+]], var8 -; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8] -; CHECK: dmb ish +; CHECK-NOT: dmb +; CHECK: adrp [[HIADDR:x[0-9]+]], var8 +; CHECK-NOT: dmb +; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8 +; CHECK-NOT: dmb +; CHECK: ldarb w0, [x[[ADDR]]] +; CHECK-NOT: dmb ret i8 %val } @@ -954,6 +961,7 @@ define i16 @test_atomic_load_monotonic_i16() nounwind { %val = load atomic i16* @var16 monotonic, align 2 ; CHECK-NOT: dmb ; CHECK: adrp x[[HIADDR:[0-9]+]], var16 +; CHECK-NOT: dmb ; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16] ; CHECK-NOT: dmb @@ -976,9 +984,13 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind define i64 @test_atomic_load_seq_cst_i64() nounwind { ; CHECK: test_atomic_load_seq_cst_i64: %val = load atomic i64* @var64 seq_cst, align 8 -; CHECK: adrp x[[HIADDR:[0-9]+]], var64 -; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64] -; CHECK: dmb ish +; CHECK-NOT: dmb +; CHECK: adrp [[HIADDR:x[0-9]+]], var64 +; CHECK-NOT: dmb +; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64 +; CHECK-NOT: dmb +; CHECK: ldar x0, [x[[ADDR]]] +; CHECK-NOT: dmb ret i64 %val } @@ -1005,20 +1017,26 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) define void @test_atomic_store_release_i8(i8 %val) nounwind { ; CHECK: test_atomic_store_release_i8: store atomic i8 %val, i8* @var8 release, align 1 +; CHECK-NOT: dmb ; CHECK: adrp [[HIADDR:x[0-9]+]], var8 +; CHECK-NOT: dmb ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8 +; CHECK-NOT: dmb ; CHECK: stlrb w0, [x[[ADDR]]] - +; CHECK-NOT: dmb ret void } define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind { ; CHECK: test_atomic_store_seq_cst_i8: store atomic i8 %val, i8* @var8 seq_cst, align 1 +; CHECK-NOT: dmb ; CHECK: adrp [[HIADDR:x[0-9]+]], var8 +; CHECK-NOT: dmb ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8 +; CHECK-NOT: dmb ; CHECK: stlrb w0, [x[[ADDR]]] -; CHECK: dmb ish +; CHECK-NOT: dmb ret void } @@ -1026,9 +1044,11 @@ define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind { define void @test_atomic_store_monotonic_i16(i16 %val) nounwind { ; CHECK: test_atomic_store_monotonic_i16: store atomic i16 %val, i16* @var16 monotonic, align 2 +; CHECK-NOT: dmb ; CHECK: adrp x[[HIADDR:[0-9]+]], var16 +; CHECK-NOT: dmb ; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16] - +; CHECK-NOT: dmb ret void } @@ -1039,7 +1059,9 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va %addr = inttoptr i64 %addr_int to i32* store atomic i32 %val, i32* %addr monotonic, align 4 +; CHECK-NOT: dmb ; CHECK: str w2, [x0, x1] +; CHECK-NOT: dmb ret void } @@ -1047,9 +1069,12 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va define void @test_atomic_store_release_i64(i64 %val) nounwind { ; CHECK: test_atomic_store_release_i64: store atomic i64 %val, i64* @var64 release, align 8 +; CHECK-NOT: dmb ; CHECK: adrp [[HIADDR:x[0-9]+]], var64 +; CHECK-NOT: dmb ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64 +; CHECK-NOT: dmb ; CHECK: stlr x0, [x[[ADDR]]] - +; CHECK-NOT: dmb ret void }