X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=eb444d57cffae2810c48da73e059dd253e5abc46;hp=fa38332ea4927630a77bf1fddebc14a1e9f8a5f3;hb=cfe761c9e6cf97dc804725df08247b9402085f84;hpb=cc921d6f41d6aaeaba31d3a534eaecf77a2475bd diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index fa38332ea49..eb444d57cff 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -163,7 +164,7 @@ static TargetLoweringObjectFile *createTLOF(const Triple &TT) { return new ARMElfTargetObjectFile(); } -ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) +ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget(); RegInfo = TM.getSubtargetImpl()->getRegisterInfo(); @@ -408,8 +409,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); - if (!Subtarget->isFPOnlySP()) - addRegisterClass(MVT::f64, &ARM::DPRRegClass); + addRegisterClass(MVT::f64, &ARM::DPRRegClass); } for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -591,6 +591,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::ADDC); + if (Subtarget->isFPOnlySP()) { + // When targetting a floating-point unit with only single-precision + // operations, f64 is legal for the few double-precision instructions which + // are present However, no double-precision operations other than moves, + // loads and stores are provided by the hardware. + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); + } computeRegisterProperties(); @@ -734,8 +767,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // the default expansion. If we are targeting a single threaded system, + // then set them all for expand so we can lower them later into their + // non-atomic form. + if (TM.Options.ThreadModel == ThreadModel::Single) + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); @@ -743,7 +780,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops()) { - // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); } } else { @@ -844,8 +881,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // v8 adds f64 <-> f16 conversion. Before that it should be expanded. - if (!Subtarget->hasV8Ops()) { + // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); } @@ -861,7 +898,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + if (Subtarget->getTargetTriple().isiOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); @@ -869,16 +906,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } - // ARMv8 implements a lot of rounding-like FP operations. - if (Subtarget->hasV8Ops()) { - static MVT RoundingTypes[] = {MVT::f32, MVT::f64}; - for (const auto Ty : RoundingTypes) { - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); + // FP-ARMv8 implements a lot of rounding-like FP operations. + if (Subtarget->hasFPARMv8()) { + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FROUND, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + if (!Subtarget->isFPOnlySP()) { + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); } } // We have target-specific dag combine patterns for the following nodes: @@ -1548,7 +1590,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // True if this byval aggregate will be split between registers // and memory. unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); - unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); + unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); if (CurByValIdx < ByValArgsCount) { @@ -1982,6 +2024,19 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Subtarget->isThumb1Only()) return false; + // Externally-defined functions with weak linkage should not be + // tail-called on ARM when the OS does not support dynamic + // pre-emption of symbols, as the AAELF spec requires normal calls + // to undefined weak functions to be replaced with a NOP or jump to the + // next instruction. The behaviour of branch instructions in this + // situation (as used for tail calls) is implementation-defined, so we + // cannot rely on the linker replacing the tail call with a return. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + const GlobalValue *GV = G->getGlobal(); + if (GV->hasExternalWeakLinkage()) + return false; + } + // If the calling conventions do not match, then we'd better make sure the // results are returned in the same way as what the caller expects. if (!CCMatch) { @@ -2263,9 +2318,15 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (Copies.count(UseChain.getNode())) // Second CopyToReg Copy = *UI; - else + else { + // We are at the top of this chain. + // If the copy has a glue operand, we conservatively assume it + // isn't safe to perform a tail call. + if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue) + return false; // First CopyToReg TCChain = UseChain; + } } } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. @@ -2274,6 +2335,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + return false; TCChain = Copy->getOperand(0); } else { return false; @@ -2614,9 +2679,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::arm_rbit: { - assert(Op.getOperand(0).getValueType() == MVT::i32 && + assert(Op.getOperand(1).getValueType() == MVT::i32 && "RBIT intrinsic must have i32 type!"); - return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0)); + return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); } case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -2673,7 +2738,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, ConstantSDNode *OrdN = cast(Op.getOperand(1)); AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); - unsigned Domain = ARM_MB::ISH; + ARM_MB::MemBOpt Domain = ARM_MB::ISH; if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; @@ -3001,7 +3066,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (Flags.isByVal()) { unsigned ExtraArgRegsSize; unsigned ExtraArgRegsSaveSize; - computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(), + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(), Flags.getByValSize(), ExtraArgRegsSize, ExtraArgRegsSaveSize); @@ -3016,7 +3081,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } CCInfo.rewindByValRegsInfo(); lastInsIndex = -1; - if (isVarArg) { + if (isVarArg && MFI->hasVAStart()) { unsigned ExtraArgRegsSize; unsigned ExtraArgRegsSaveSize; computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, @@ -3125,7 +3190,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { - unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); + unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); int FrameIndex = StoreByValRegs( @@ -3158,7 +3223,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } // varargs - if (isVarArg) + if (isVarArg && MFI->hasVAStart()) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), TotalArgRegsSaveSize); @@ -3180,6 +3245,18 @@ static bool isFloatingPointZero(SDValue Op) { if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } + } else if (Op->getOpcode() == ISD::BITCAST && + Op->getValueType(0) == MVT::f64) { + // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) + // created by LowerConstantFP(). + SDValue BitcastOp = Op->getOperand(0); + if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) { + SDValue MoveOp = BitcastOp->getOperand(0); + if (MoveOp->getOpcode() == ISD::TargetConstant && + cast(MoveOp)->getZExtValue() == 0) { + return true; + } + } } return false; } @@ -3248,6 +3325,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDLoc dl) const { + assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); @@ -3363,9 +3441,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); - return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, - ARMcc, CCR, OverflowCmp); - + return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR, + OverflowCmp, DAG); } // Convert: @@ -3399,7 +3476,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = Cond.getOperand(3); SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); assert(True.getValueType() == VT); - return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); + return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); } } } @@ -3469,6 +3546,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, } } +SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, + SDValue TrueVal, SDValue ARMcc, SDValue CCR, + SDValue Cmp, SelectionDAG &DAG) const { + if (Subtarget->isFPOnlySP() && VT == MVT::f64) { + FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), FalseVal); + TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), TrueVal); + + SDValue TrueLow = TrueVal.getValue(0); + SDValue TrueHigh = TrueVal.getValue(1); + SDValue FalseLow = FalseVal.getValue(0); + SDValue FalseHigh = FalseVal.getValue(1); + + SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, + ARMcc, CCR, Cmp); + SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, + ARMcc, CCR, duplicateCmp(Cmp, DAG)); + + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); + } else { + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, + Cmp); + } +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3478,6 +3581,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue FalseVal = Op.getOperand(3); SDLoc dl(Op); + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, + dl); + + // If softenSetCCOperands only returned one value, we should compare it to + // zero. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + if (LHS.getValueType() == MVT::i32) { // Try to generate VSEL on ARMv8. // The VSEL instruction can't use all the usual ARM condition @@ -3502,8 +3617,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, - Cmp); + return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; @@ -3518,12 +3632,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // select c, a, b // We only do this in unsafe-fp-math, because signed zeros and NaNs are // handled differently than the original code sequence. - if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && - RHS == FalseVal) { - if (CC == ISD::SETOGT || CC == ISD::SETUGT) - return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOLT || CC == ISD::SETULT) - return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + if (getTargetMachine().Options.UnsafeFPMath) { + if (LHS == TrueVal && RHS == FalseVal) { + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } else if (LHS == FalseVal && RHS == TrueVal) { + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } } bool swpCmpOps = false; @@ -3542,14 +3662,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMcc, CCR, Cmp); + SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); - Result = DAG.getNode(ARMISD::CMOV, dl, VT, - Result, TrueVal, ARMcc2, CCR, Cmp2); + Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; } @@ -3682,6 +3800,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); SDLoc dl(Op); + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, + dl); + + // If softenSetCCOperands only returned one value, we should compare it to + // zero. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); @@ -3774,11 +3904,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } -static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); + if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::FP_TO_SINT) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), + Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), + Op.getValueType()); + return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + /*isSigned*/ false, SDLoc(Op)).first; + } + SDLoc dl(Op); unsigned Opc; @@ -3828,11 +3970,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(Opc, dl, VT, Op); } -static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); + if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::SINT_TO_FP) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), + Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), + Op.getValueType()); + return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + /*isSigned*/ false, SDLoc(Op)).first; + } + SDLoc dl(Op); unsigned Opc; @@ -4341,7 +4495,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); - if (Op.getOperand(1).getValueType().isFloatingPoint()) { + if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: @@ -4605,6 +4759,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast(Op); + // Use the default (constant pool) lowering for double constants when we have + // an SP-only FPU + if (IsDouble && Subtarget->isFPOnlySP()) + return SDValue(); + // Try splatting with a VMOV.f32... APFloat FPVal = CFP->getValueAPF(); int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); @@ -6308,6 +6467,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) return LowerDYNAMIC_STACKALLOC(Op, DAG); llvm_unreachable("Don't know how to custom lower this!"); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); } } @@ -6428,9 +6589,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) - .addFrameIndex(FI) - .addImm(36)); // &jbuf[1] :: pc + BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) + .addFrameIndex(FI) + .addImm(36); // &jbuf[1] :: pc AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg5, RegState::Kill) @@ -6797,9 +6958,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // N.B. the order the invoke BBs are processed in doesn't matter here. const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector MBBLPads; - for (SmallPtrSet::iterator - I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { - MachineBasicBlock *BB = *I; + for (MachineBasicBlock *BB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. @@ -7532,12 +7691,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - if (!MI->hasPostISelHook()) { - assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && - "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); - return; - } - const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional @@ -8451,10 +8604,11 @@ static SDValue PerformBFICombine(SDNode *N, /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::VMOVDRR) + if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); // vmovrrd(load f64) -> (load i32), (load i32) @@ -8667,7 +8821,8 @@ static bool hasNormalLoadOperand(SDNode *N) { /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for /// ISD::BUILD_VECTOR. static SDValue PerformBUILD_VECTORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI){ + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value // into a pair of GPRs, which is fine when the value is used as a scalar, @@ -8973,7 +9128,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs+2)); + SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); SmallVector Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); @@ -9052,7 +9207,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs+1)); + SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, @@ -9682,10 +9837,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); - case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); + case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ISD::STORE: return PerformSTORECombine(N, DCI); - case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); + case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); @@ -10606,7 +10761,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && - "Invalid opcode for Div/Rem lowering"); + "Invalid opcode for Div/Rem lowering"); bool isSigned = (Opcode == ISD::SDIVREM); EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); @@ -10614,10 +10769,10 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { RTLIB::Libcall LC; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; - case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; - case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; - case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; } SDValue InChain = DAG.getEntryNode(); @@ -10675,6 +10830,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, DL); } +SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && + "Unexpected type for custom-lowering FP_EXTEND"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} + +SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOperand(0).getValueType() == MVT::f64 && + Subtarget->isFPOnlySP() && + "Unexpected type for custom-lowering FP_ROUND"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10702,7 +10882,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return false; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; - if (VT == MVT::f64) + if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) return ARM_AM::getFP64Imm(Imm) != -1; return false; } @@ -10829,23 +11009,113 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { - // Loads and stores less than 64-bits are already atomic; ones above that - // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit - // anything for those. - bool IsMClass = Subtarget->isMClass(); - if (StoreInst *SI = dyn_cast(Inst)) { - unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return Size == 64 && !IsMClass; - } else if (LoadInst *LI = dyn_cast(Inst)) { - return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; +bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } + +Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, + ARM_MB::MemBOpt Domain) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + + // First, if the target has no DMB, see what fallback we can use. + if (!Subtarget->hasDataBarrier()) { + // Some ARMv6 cpus can support data barriers with an mcr instruction. + // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get + // here. + if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { + Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); + Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), + Builder.getInt32(0), Builder.getInt32(7), + Builder.getInt32(10), Builder.getInt32(5)}; + return Builder.CreateCall(MCR, args); + } else { + // Instead of using barriers, atomic accesses on these subtargets use + // libcalls. + llvm_unreachable("makeDMB on a target so old that it has no barriers"); + } + } else { + Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); + // Only a full system barrier exists in the M-class architectures. + Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; + Constant *CDomain = Builder.getInt32(Domain); + return Builder.CreateCall(DMB, CDomain); + } +} + +// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html +Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord, bool IsStore, + bool IsLoad) const { + if (!getInsertFencesForAtomic()) + return nullptr; + + switch (Ord) { + case NotAtomic: + case Unordered: + llvm_unreachable("Invalid fence: unordered/non-atomic"); + case Monotonic: + case Acquire: + return nullptr; // Nothing to do + case SequentiallyConsistent: + if (!IsStore) + return nullptr; // Nothing to do + /*FALLTHROUGH*/ + case Release: + case AcquireRelease: + if (Subtarget->isSwift()) + return makeDMB(Builder, ARM_MB::ISHST); + // FIXME: add a comment with a link to documentation justifying this. + else + return makeDMB(Builder, ARM_MB::ISH); + } + llvm_unreachable("Unknown fence ordering in emitLeadingFence"); +} + +Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord, bool IsStore, + bool IsLoad) const { + if (!getInsertFencesForAtomic()) + return nullptr; + + switch (Ord) { + case NotAtomic: + case Unordered: + llvm_unreachable("Invalid fence: unordered/not-atomic"); + case Monotonic: + case Release: + return nullptr; // Nothing to do + case Acquire: + case AcquireRelease: + case SequentiallyConsistent: + return makeDMB(Builder, ARM_MB::ISH); } + llvm_unreachable("Unknown fence ordering in emitTrailingFence"); +} + +// Loads and stores less than 64-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit +// anything for those. +bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return (Size == 64) && !Subtarget->isMClass(); +} + +// Loads and stores less than 64-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit +// anything for those. +// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that +// guarantee, see DDI0406C ARM architecture reference manual, +// sections A8.8.72-74 LDRD) +bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); + return (Size == 64) && !Subtarget->isMClass(); +} - // For the real atomic operations, we have ldrex/strex up to 32 bits, - // and up to 64 bits on the non-M profiles - unsigned AtomicLimit = IsMClass ? 32 : 64; - return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; +// For the real atomic operations, we have ldrex/strex up to 32 bits, +// and up to 64 bits on the non-M profiles +bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + return Size <= (Subtarget->isMClass() ? 32U : 64U); } // This has so far only been implemented for MachO. @@ -10853,12 +11123,40 @@ bool ARMTargetLowering::useLoadStackGuardNode() const { return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO; } +bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const { + // If we do not have NEON, vector types are not natively supported. + if (!Subtarget->hasNEON()) + return false; + + // Floating point values and vector values map to the same register file. + // Therefore, althought we could do a store extract of a vector type, this is + // better to leave at float as we have more freedom in the addressing mode for + // those. + if (VectorTy->isFPOrFPVectorTy()) + return false; + + // If the index is unknown at compile time, this is very expensive to lower + // and it is not possible to combine the store with the extract. + if (!isa(Idx)) + return false; + + assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); + unsigned BitWidth = cast(VectorTy)->getBitWidth(); + // We can do a store + vector extract on any vector that fits perfectly in a D + // or Q register. + if (BitWidth == 64 || BitWidth == 128) { + Cost = 0; + return true; + } + return false; +} + Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast(Addr->getType())->getElementType(); - bool IsAcquire = - Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + bool IsAcquire = isAtLeastAcquire(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a @@ -10894,8 +11192,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - bool IsRelease = - Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + bool IsRelease = isAtLeastRelease(Ord); // Since the intrinsics must have legal type, the i64 intrinsics take two // parameters: "i32, i32". We must marshal Val into the appropriate form