From 989f61e6c03b4717838b73aeaac7a38e2d8bb06a Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 2 Aug 2011 22:44:16 +0000 Subject: [PATCH] ARM backend support for atomicrmw and cmpxchg with non-monotonic ordering. Not especially pretty, but seems to work well enough. If this looks okay, I'll put together similar patches for Mips, PPC, and Alpha. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136737 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 124 ++++++++++++++++++----------- 1 file changed, 77 insertions(+), 47 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 46b42996834..ed5e3ab6ca8 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -602,58 +602,37 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); } else { // Set them all for expansion, which will force libcalls. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Since the libcalls include locking, fold in the fences setShouldFoldAtomicFences(true); } - // 64-bit versions are always libcalls (for now) - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -2279,33 +2258,72 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DAG.getConstant(DMBOpt, MVT::i32)); } - -static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { - // FIXME: handle "fence singlethread" more efficiently. - DebugLoc dl = Op.getDebugLoc(); +static SDValue getFence(SDValue InChain, DebugLoc dl, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), + return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, InChain, DAG.getConstant(0, MVT::i32)); } - AtomicOrdering FenceOrdering = static_cast( - cast(Op.getOperand(1))->getZExtValue()); + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, InChain, + DAG.getConstant(ARM_MB::ISH, MVT::i32)); +} - ARM_MB::MemBOpt DMBOpt; - if (FenceOrdering == Release) - DMBOpt = ARM_MB::ISHST; +static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // FIXME: handle "fence singlethread" more efficiently. + DebugLoc dl = Op.getDebugLoc(); + return getFence(Op.getOperand(0), dl, DAG, Subtarget); +} + +static SDValue LowerAtomicMemOp(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + DebugLoc dl = Op.getDebugLoc(); + int Order = cast(Op)->getOrdering(); + if (Order <= Monotonic) + return Op; + + SDValue InChain = Op.getOperand(0); + + // Fence, if necessary + if (Order == Release || Order >= AcquireRelease) + InChain = getFence(InChain, dl, DAG, Subtarget); + + // Rather than mess with target-specific nodes, use the target-indepedent + // node, and assume the DAGCombiner will not touch it post-legalize. + SDValue OutVal; + if (Op.getOpcode() == ISD::ATOMIC_CMP_SWAP) + OutVal = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, + cast(Op)->getMemoryVT(), + InChain, Op.getOperand(1), Op.getOperand(2), + Op.getOperand(3), + cast(Op)->getMemOperand(), + Monotonic, + cast(Op)->getSynchScope()); else - DMBOpt = ARM_MB::ISH; - return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(DMBOpt, MVT::i32)); + OutVal = DAG.getAtomic(Op.getOpcode(), dl, + cast(Op)->getMemoryVT(), + InChain, Op.getOperand(1), Op.getOperand(2), + cast(Op)->getMemOperand(), + Monotonic, + cast(Op)->getSynchScope()); + + SDValue OutChain = OutVal.getValue(1); + + // Fence, if necessary + if (Order == Acquire || Order >= AcquireRelease) + OutChain = getFence(OutChain, dl, DAG, Subtarget); + + SDValue Ops[2] = { OutVal, OutChain }; + return DAG.getMergeValues(Ops, 2, dl); } + static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // ARM pre v5TE and Thumb1 does not have preload instructions. @@ -4864,6 +4882,18 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: return LowerAtomicMemOp(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); -- 2.34.1