X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=e8f3ab65bdbeea7252c52b5228342f1187e7f29f;hb=92989cbe8452a9406f6e0d3c5709b5745f27158a;hp=49f8918b8ddd8d7523c1dc6923e3b3c679091139;hpb=624894b4d2e29559e7521cfd2f1946f9de1802e1;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 49f8918b8dd..e8f3ab65bdb 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -242,6 +242,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setCmpLibcallCC(LC.Op, LC.Cond); } } + + // Set the correct calling convention for ARMv7k WatchOS. It's just + // AAPCS_VFP for functions as simple as libcalls. + if (Subtarget->isTargetWatchOS()) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) + setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); + } } // These libcalls are not available in 32-bit. @@ -249,8 +256,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); - if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && - !Subtarget->isTargetWindows()) { + // RTLIB + if (Subtarget->isAAPCS_ABI() && + (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || + Subtarget->isTargetAndroid())) { static const struct { const RTLIB::Libcall Op; const char * const Name; @@ -338,12 +347,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - - // Memory operations - // RTABI chapter 4.3.4 - { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { @@ -352,6 +355,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } + + // EABI dependent RTLIB + if (TM.Options.EABIVersion == EABI::EABI4 || + TM.Options.EABIVersion == EABI::EABI5) { + static const struct { + const RTLIB::Libcall Op; + const char *const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } MemOpsLibraryCalls[] = { + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : MemOpsLibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } } if (Subtarget->isTargetWindows()) { @@ -368,6 +395,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP }, }; for (const auto &LC : LibraryCalls) { @@ -377,8 +408,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->getTargetTriple().isiOS() && - !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { + if (Subtarget->isTargetWatchOS() || + (Subtarget->isTargetIOS() && + !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } @@ -717,8 +749,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBE, MVT::i32, Custom); } + if (!Subtarget->isThumb1Only()) + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + // ARM does not have ROTL. - setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + } setOperationAction(ISD::CTTZ, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Expand); if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) @@ -746,14 +785,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i32, LibCall); } - if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) { - setOperationAction(ISD::SDIV, MVT::i32, Custom); - setOperationAction(ISD::UDIV, MVT::i32, Custom); - - setOperationAction(ISD::SDIV, MVT::i64, Custom); - setOperationAction(ISD::UDIV, MVT::i64, Custom); - } - setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) @@ -801,13 +832,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (!Subtarget->isTargetMachO()) { - // Non-MachO platforms may return values in these registers via the - // personality function. - setExceptionPointerRegister(ARM::R0); - setExceptionSelectorRegister(ARM::R1); - } - if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else @@ -877,7 +901,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); - if (Subtarget->isTargetDarwin()) + if (Subtarget->useSjLjEH()) setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); setOperationAction(ISD::SETCC, MVT::i32, Expand); @@ -937,7 +961,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->getTargetTriple().isiOS()) { + if (Subtarget->isTargetWatchOS()) { + setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); + setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); + } + if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); @@ -1100,8 +1128,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::RBIT: return "ARMISD::RBIT"; - case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -2768,7 +2794,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, case Intrinsic::arm_rbit: { assert(Op.getOperand(1).getValueType() == MVT::i32 && "RBIT intrinsic must have i32 type!"); - return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); + return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1)); } case Intrinsic::arm_thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -3257,13 +3283,9 @@ static bool isFloatingPointZero(SDValue Op) { // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) // created by LowerConstantFP(). SDValue BitcastOp = Op->getOperand(0); - if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) { - SDValue MoveOp = BitcastOp->getOperand(0); - if (MoveOp->getOpcode() == ISD::TargetConstant && - cast(MoveOp)->getZExtValue() == 0) { - return true; - } - } + if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(BitcastOp->getOperand(0))) + return true; } return false; } @@ -4347,7 +4369,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, if (!ST->hasV6T2Ops()) return SDValue(); - SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); + SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0)); return DAG.getNode(ISD::CTLZ, dl, VT, rbit); } @@ -4508,8 +4530,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. - if (!isa(N->getOperand(1)) || - cast(N->getOperand(1))->getZExtValue() != 1) + if (!isOneConstant(N->getOperand(1))) return SDValue(); // If we are in thumb mode, we don't have RRX. @@ -5391,16 +5412,14 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // just use VDUPLANE. We can only do this if the lane being extracted // is at a constant index, as the VDUP from lane instructions only have // constant-index forms. + ConstantSDNode *constIndex; if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa(Value->getOperand(1))) { + (constIndex = dyn_cast(Value->getOperand(1)))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element // such that the register coalescer will remove unnecessary copies. if (VT != Value->getOperand(0).getValueType()) { - ConstantSDNode *constIndex; - constIndex = dyn_cast(Value->getOperand(1)); - assert(constIndex && "The index is not a constant!"); unsigned index = constIndex->getAPIntValue().getLimitedValue() % VT.getVectorNumElements(); N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, @@ -6572,27 +6591,33 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. - StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); - - // Create stack object for sret. + Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr); auto &DL = DAG.getDataLayout(); - const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); - const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); - int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); ArgListTy Args; - ArgListEntry Entry; - - Entry.Node = SRet; - Entry.Ty = RetTy->getPointerTo(); - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isSRet = true; - Args.push_back(Entry); + bool ShouldUseSRet = Subtarget->isAPCS_ABI(); + SDValue SRet; + if (ShouldUseSRet) { + // Create stack object for sret. + const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); + const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL)); + + ArgListEntry Entry; + Entry.Node = SRet; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isSRet = true; + Args.push_back(Entry); + RetTy = Type::getVoidTy(*DAG.getContext()); + } + ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; @@ -6601,16 +6626,21 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; + RTLIB::Libcall LC = + (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32; + CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, - std::move(Args), 0) - .setDiscardResult(); - + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setCallee(CC, RetTy, Callee, std::move(Args), 0) + .setDiscardResult(ShouldUseSRet); std::pair CallResult = LowerCallTo(CLI); + if (!ShouldUseSRet) + return CallResult.first; + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo(), false, false, false, 0); @@ -6794,14 +6824,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); - case ISD::SDIV: - if (Subtarget->isTargetWindows()) - return LowerDIV_Windows(Op, DAG, /* Signed */ true); - return LowerSDIV(Op, DAG); - case ISD::UDIV: - if (Subtarget->isTargetWindows()) - return LowerDIV_Windows(Op, DAG, /* Signed */ false); - return LowerUDIV(Op, DAG); + case ISD::SDIV: return LowerSDIV(Op, DAG); + case ISD::UDIV: return LowerUDIV(Op, DAG); case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: @@ -7322,7 +7346,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, } } - BB->addSuccessor(DispatchBB); + BB->addSuccessor(DispatchBB, BranchProbability::getZero()); // Find the invoke call and mark all of the callee-saved registers as // 'implicit defined' so that they're spilled. This prevents code from @@ -8166,10 +8190,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Helper function that checks if N is a null or all ones constant. static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { - ConstantSDNode *C = dyn_cast(N); - if (!C) - return false; - return AllOnes ? C->isAllOnesValue() : C->isNullValue(); + return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); } // Return true if N is conditionally 0 or all ones. @@ -8991,12 +9012,88 @@ static SDValue PerformXORCombine(SDNode *N, return SDValue(); } -/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff -/// the bits being cleared by the AND are not demanded by the BFI. +// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it, +// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and +// their position in "to" (Rd). +static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) { + assert(N->getOpcode() == ARMISD::BFI); + + SDValue From = N->getOperand(1); + ToMask = ~cast(N->getOperand(2))->getAPIntValue(); + FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation()); + + // If the Base came from a SHR #C, we can deduce that it is really testing bit + // #C in the base of the SHR. + if (From->getOpcode() == ISD::SRL && + isa(From->getOperand(1))) { + APInt Shift = cast(From->getOperand(1))->getAPIntValue(); + assert(Shift.getLimitedValue() < 32 && "Shift too large!"); + FromMask <<= Shift.getLimitedValue(31); + From = From->getOperand(0); + } + + return From; +} + +// If A and B contain one contiguous set of bits, does A | B == A . B? +// +// Neither A nor B must be zero. +static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) { + unsigned LastActiveBitInA = A.countTrailingZeros(); + unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1; + return LastActiveBitInA - 1 == FirstActiveBitInB; +} + +static SDValue FindBFIToCombineWith(SDNode *N) { + // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with, + // if one exists. + APInt ToMask, FromMask; + SDValue From = ParseBFI(N, ToMask, FromMask); + SDValue To = N->getOperand(0); + + // Now check for a compatible BFI to merge with. We can pass through BFIs that + // aren't compatible, but not if they set the same bit in their destination as + // we do (or that of any BFI we're going to combine with). + SDValue V = To; + APInt CombinedToMask = ToMask; + while (V.getOpcode() == ARMISD::BFI) { + APInt NewToMask, NewFromMask; + SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask); + if (NewFrom != From) { + // This BFI has a different base. Keep going. + CombinedToMask |= NewToMask; + V = V.getOperand(0); + continue; + } + + // Do the written bits conflict with any we've seen so far? + if ((NewToMask & CombinedToMask).getBoolValue()) + // Conflicting bits - bail out because going further is unsafe. + return SDValue(); + + // Are the new bits contiguous when combined with the old bits? + if (BitsProperlyConcatenate(ToMask, NewToMask) && + BitsProperlyConcatenate(FromMask, NewFromMask)) + return V; + if (BitsProperlyConcatenate(NewToMask, ToMask) && + BitsProperlyConcatenate(NewFromMask, FromMask)) + return V; + + // We've seen a write to some bits, so track it. + CombinedToMask |= NewToMask; + // Keep going... + V = V.getOperand(0); + } + + return SDValue(); +} + static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N1 = N->getOperand(1); if (N1.getOpcode() == ISD::AND) { + // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff + // the bits being cleared by the AND are not demanded by the BFI. ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); @@ -9012,6 +9109,38 @@ static SDValue PerformBFICombine(SDNode *N, return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); + } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) { + // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes. + // Keep track of any consecutive bits set that all come from the same base + // value. We can combine these together into a single BFI. + SDValue CombineBFI = FindBFIToCombineWith(N); + if (CombineBFI == SDValue()) + return SDValue(); + + // We've found a BFI. + APInt ToMask1, FromMask1; + SDValue From1 = ParseBFI(N, ToMask1, FromMask1); + + APInt ToMask2, FromMask2; + SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2); + assert(From1 == From2); + (void)From2; + + // First, unlink CombineBFI. + DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0)); + // Then create a new BFI, combining the two together. + APInt NewFromMask = FromMask1 | FromMask2; + APInt NewToMask = ToMask1 | ToMask2; + + EVT VT = N->getValueType(0); + SDLoc dl(N); + + if (NewFromMask[0] == 0) + From1 = DCI.DAG.getNode( + ISD::SRL, dl, VT, From1, + DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT)); + return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1, + DCI.DAG.getConstant(~NewToMask, dl, VT)); } return SDValue(); } @@ -10201,6 +10330,125 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, + APInt &KnownOne) { + if (Op.getOpcode() == ARMISD::BFI) { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); + + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast(Op.getOperand(2)); + APInt Mask = CI->getAPIntValue(); + KnownZero &= Mask; + KnownOne &= Mask; + return; + } + if (Op.getOpcode() == ARMISD::CMOV) { + APInt KZ2(KnownZero.getBitWidth(), 0); + APInt KO2(KnownOne.getBitWidth(), 0); + computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne); + computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2); + + KnownZero &= KZ2; + KnownOne &= KO2; + return; + } + return DAG.computeKnownBits(Op, KnownZero, KnownOne); +} + +SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { + // If we have a CMOV, OR and AND combination such as: + // if (x & CN) + // y |= CM; + // + // And: + // * CN is a single bit; + // * All bits covered by CM are known zero in y + // + // Then we can convert this into a sequence of BFI instructions. This will + // always be a win if CM is a single bit, will always be no worse than the + // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is + // three bits (due to the extra IT instruction). + + SDValue Op0 = CMOV->getOperand(0); + SDValue Op1 = CMOV->getOperand(1); + auto CCNode = cast(CMOV->getOperand(2)); + auto CC = CCNode->getAPIntValue().getLimitedValue(); + SDValue CmpZ = CMOV->getOperand(4); + + // The compare must be against zero. + if (!isNullConstant(CmpZ->getOperand(1))) + return SDValue(); + + assert(CmpZ->getOpcode() == ARMISD::CMPZ); + SDValue And = CmpZ->getOperand(0); + if (And->getOpcode() != ISD::AND) + return SDValue(); + ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + return SDValue(); + SDValue X = And->getOperand(0); + + if (CC == ARMCC::EQ) { + // We're performing an "equal to zero" compare. Swap the operands so we + // canonicalize on a "not equal to zero" compare. + std::swap(Op0, Op1); + } else { + assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"); + } + + if (Op1->getOpcode() != ISD::OR) + return SDValue(); + + ConstantSDNode *OrC = dyn_cast(Op1->getOperand(1)); + if (!OrC) + return SDValue(); + SDValue Y = Op1->getOperand(0); + + if (Op0 != Y) + return SDValue(); + + // Now, is it profitable to continue? + APInt OrCI = OrC->getAPIntValue(); + unsigned Heuristic = Subtarget->isThumb() ? 3 : 2; + if (OrCI.countPopulation() > Heuristic) + return SDValue(); + + // Lastly, can we determine that the bits defined by OrCI + // are zero in Y? + APInt KnownZero, KnownOne; + computeKnownBits(DAG, Y, KnownZero, KnownOne); + if ((OrCI & KnownZero) != OrCI) + return SDValue(); + + // OK, we can do the combine. + SDValue V = Y; + SDLoc dl(X); + EVT VT = X.getValueType(); + unsigned BitInX = AndC->getAPIntValue().logBase2(); + + if (BitInX != 0) { + // We must shift X first. + X = DAG.getNode(ISD::SRL, dl, VT, X, + DAG.getConstant(BitInX, dl, VT)); + } + + for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits(); + BitInY < NumActiveBits; ++BitInY) { + if (OrCI[BitInY] == 0) + continue; + APInt Mask(VT.getSizeInBits(), 0); + Mask.setBit(BitInY); + V = DAG.getNode(ARMISD::BFI, dl, VT, V, X, + // Confusingly, the operand is an *inverted* mask. + DAG.getConstant(~Mask, dl, VT)); + } + + return V; +} + /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { @@ -10219,6 +10467,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + // BFI is only available on V6T2+. + if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { + SDValue R = PerformCMOVToBFICombine(N, DAG); + if (R) + return R; + } + // Simplify // mov r1, r0 // cmp r1, x @@ -11689,6 +11944,14 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, return false; } +bool ARMTargetLowering::isCheapToSpeculateCttz() const { + return Subtarget->hasV6T2Ops(); +} + +bool ARMTargetLowering::isCheapToSpeculateCtlz() const { + return Subtarget->hasV6T2Ops(); +} + Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -12005,3 +12268,17 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); return IsHA || IsIntArray; } + +unsigned ARMTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + // Platforms which do not use SjLj EH may return values in these registers + // via the personality function. + return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0; +} + +unsigned ARMTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + // Platforms which do not use SjLj EH may return values in these registers + // via the personality function. + return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1; +}