X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=fc57d83ce1eaec0708a82778b62d617357b03eda;hp=ebc63e0db245cbaace0cf63c913525ea520b4c78;hb=d0fdbdba37f2bc234e72b8607309e48bf0dff97a;hpb=5a1af3657c51ec2b5d2a71b3dd877acc4a4d60aa diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ebc63e0db24..fc57d83ce1e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -143,15 +143,10 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); - if (VT.isInteger()) { - setOperationAction(ISD::SABSDIFF, VT, Legal); - setOperationAction(ISD::UABSDIFF, VT, Legal); - } if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); - } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { @@ -242,6 +237,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setCmpLibcallCC(LC.Op, LC.Cond); } } + + // Set the correct calling convention for ARMv7k WatchOS. It's just + // AAPCS_VFP for functions as simple as libcalls. + if (Subtarget->isTargetWatchOS()) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) + setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); + } } // These libcalls are not available in 32-bit. @@ -249,8 +251,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); - if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && - !Subtarget->isTargetWindows()) { + // RTLIB + if (Subtarget->isAAPCS_ABI() && + (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || + Subtarget->isTargetAndroid())) { static const struct { const RTLIB::Libcall Op; const char * const Name; @@ -338,12 +342,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - - // Memory operations - // RTABI chapter 4.3.4 - { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { @@ -352,6 +350,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } + + // EABI dependent RTLIB + if (TM.Options.EABIVersion == EABI::EABI4 || + TM.Options.EABIVersion == EABI::EABI5) { + static const struct { + const RTLIB::Libcall Op; + const char *const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } MemOpsLibraryCalls[] = { + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : MemOpsLibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } } if (Subtarget->isTargetWindows()) { @@ -368,7 +390,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, - { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP }, { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP }, @@ -382,8 +403,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->getTargetTriple().isiOS() && - !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { + if (Subtarget->isTargetWatchOS() || + (Subtarget->isTargetIOS() && + !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } @@ -401,6 +423,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); } + // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have + // a __gnu_ prefix (which is the default). + if (Subtarget->isTargetAEABI()) { + setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h"); + setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h"); + setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f"); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else @@ -697,7 +727,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); } if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() - || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) + || (Subtarget->isThumb2() && !Subtarget->hasDSP())) setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); @@ -714,8 +744,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBE, MVT::i32, Custom); } + if (!Subtarget->isThumb1Only()) + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + // ARM does not have ROTL. - setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + } setOperationAction(ISD::CTTZ, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Expand); if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) @@ -725,7 +762,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); - setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); + // @llvm.readcyclecounter requires the Performance Monitors extension. + // Default to the 0 expansion on unsupported platforms. + // FIXME: Technically there are older ARM CPUs that have + // implementation-specific ways of obtaining this information. + if (Subtarget->hasPerfMon()) + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) @@ -734,15 +776,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { // These are expanded into libcalls if the cpu doesn't have HW divider. - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, LibCall); + setOperationAction(ISD::UDIV, MVT::i32, LibCall); } - // FIXME: Also set divmod for SREM on EABI/androideabi setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) { + setOperationAction(ISD::SREM, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); @@ -770,7 +814,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); - setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); @@ -784,13 +827,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (!Subtarget->isTargetMachO()) { - // Non-MachO platforms may return values in these registers via the - // personality function. - setExceptionPointerRegister(ARM::R0); - setExceptionSelectorRegister(ARM::R1); - } - if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else @@ -860,7 +896,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); - if (Subtarget->isTargetDarwin()) + if (Subtarget->useSjLjEH()) setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); setOperationAction(ISD::SETCC, MVT::i32, Expand); @@ -920,7 +956,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->getTargetTriple().isiOS()) { + if (Subtarget->isTargetWatchOS()) { + setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); + setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); + } + if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); @@ -1083,8 +1123,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::RBIT: return "ARMISD::RBIT"; - case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -1112,6 +1150,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; @@ -1167,6 +1206,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; case ARMISD::VBSL: return "ARMISD::VBSL"; + case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; @@ -2045,6 +2085,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; + assert(Subtarget->supportsTailCall()); + // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. @@ -2064,26 +2106,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; - // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: - // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as - // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation - // support in the assembler and linker to be used. This would need to be - // fixed to fully support tail calls in Thumb1. - // - // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take - // LR. This means if we need to reload LR, it takes an extra instructions, - // which outweighs the value of the tail call; but here we don't know yet - // whether LR is going to be used. Probably the right approach is to - // generate the tail call here and turn it back into CALL/RET in - // emitEpilogue if LR is used. - - // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, - // but we need to make sure there are enough registers; the only valid - // registers are the 4 used for parameters. We don't currently do this - // case. - if (Subtarget->isThumb1Only()) - return false; - // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic // pre-emption of symbols, as the AAELF spec requires normal calls @@ -2431,7 +2453,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; - return !Subtarget->isThumb1Only(); + return true; } // Trying to write a 64 bit value so need to split into two 32 bit values first, @@ -2508,6 +2530,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } +/// \brief Convert a TLS address reference into the correct sequence of loads +/// and calls to compute the variable's address for Darwin, and return an +/// SDValue containing the final node. + +/// Darwin only has one TLS scheme which must be capable of dealing with the +/// fully general situation, in the worst case. This means: +/// + "extern __thread" declaration. +/// + Defined in a possibly unknown dynamic library. +/// +/// The general system is that each __thread variable has a [3 x i32] descriptor +/// which contains information used by the runtime to calculate the address. The +/// only part of this the compiler needs to know about is the first word, which +/// contains a function pointer that must be called with the address of the +/// entire descriptor in "r0". +/// +/// Since this descriptor may be in a different unit, in general access must +/// proceed along the usual ARM rules. A common sequence to produce is: +/// +/// movw rT1, :lower16:_var$non_lazy_ptr +/// movt rT1, :upper16:_var$non_lazy_ptr +/// ldr r0, [rT1] +/// ldr rT2, [r0] +/// blx rT2 +/// [...address now in r0...] +SDValue +ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); + SDLoc DL(Op); + + // First step is to get the address of the actua global symbol. This is where + // the TLS descriptor lives. + SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); + + // The first entry in the descriptor is a function pointer that we must call + // to obtain the address of the variable. + SDValue Chain = DAG.getEntryNode(); + SDValue FuncTLVGet = + DAG.getLoad(MVT::i32, DL, Chain, DescAddr, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, true, true, 4); + Chain = FuncTLVGet.getValue(1); + + MachineFunction &F = DAG.getMachineFunction(); + MachineFrameInfo *MFI = F.getFrameInfo(); + MFI->setAdjustsStack(true); + + // TLS calls preserve all registers except those that absolutely must be + // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be + // silly). + auto TRI = + getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo(); + auto ARI = static_cast(TRI); + const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); + + // Finally, we can make the call. This is just a degenerate version of a + // normal AArch64 call node: r0 takes the address of the descriptor, and + // returns the address of the variable in this thread. + Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); + Chain = + DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), + Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), + DAG.getRegisterMask(Mask), Chain.getValue(1)); + return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); +} + // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, @@ -2609,9 +2697,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->isTargetDarwin()) + return LowerGlobalTLSAddressDarwin(Op, DAG); + // TODO: implement the "local dynamic" model - assert(Subtarget->isTargetELF() && - "TLS not implemented for non-ELF targets"); + assert(Subtarget->isTargetELF() && "Only ELF implemented here"); GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); @@ -2635,10 +2725,19 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, - UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + bool UseGOT_PREL = + !(GV->hasHiddenVisibility() || GV->hasLocalLinkage()); + + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc dl(Op); + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( + GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, + UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, + /*AddCurrentAddress=*/UseGOT_PREL); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( @@ -2646,9 +2745,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, 0); SDValue Chain = Result.getValue(1); - SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); - Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); - if (!UseGOTOFF) + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); + Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + if (UseGOT_PREL) Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, false, 0); @@ -2725,29 +2824,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, return Result; } -SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, - SelectionDAG &DAG) const { - assert(Subtarget->isTargetELF() && - "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo(); - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc dl(Op); - unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = - ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", - ARMPCLabelIndex, PCAdj); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = - DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); - return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); -} - SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -2781,7 +2857,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, case Intrinsic::arm_rbit: { assert(Op.getOperand(1).getValueType() == MVT::i32 && "RBIT intrinsic must have i32 type!"); - return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); + return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1)); } case Intrinsic::arm_thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -3220,9 +3296,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); - int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg, - CurByValIndex, VA.getLocMemOffset(), - Flags.getByValSize()); + int FrameIndex = StoreByValRegs( + CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, + VA.getLocMemOffset(), Flags.getByValSize()); InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); CCInfo.nextInRegsParam(); } else { @@ -3270,13 +3346,9 @@ static bool isFloatingPointZero(SDValue Op) { // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) // created by LowerConstantFP(). SDValue BitcastOp = Op->getOperand(0); - if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) { - SDValue MoveOp = BitcastOp->getOperand(0); - if (MoveOp->getOpcode() == ISD::TargetConstant && - cast(MoveOp)->getZExtValue() == 0) { - return true; - } - } + if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(BitcastOp->getOperand(0))) + return true; } return false; } @@ -3913,7 +3985,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } @@ -3965,7 +4037,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { else LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } @@ -4130,6 +4202,56 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl &Results, Results.push_back(Read.getOperand(0)); } +/// \p BC is a bitcast that is about to be turned into a VMOVDRR. +/// When \p DstVT, the destination type of \p BC, is on the vector +/// register bank and the source of bitcast, \p Op, operates on the same bank, +/// it might be possible to combine them, such that everything stays on the +/// vector register bank. +/// \p return The node that would replace \p BT, if the combine +/// is possible. +static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, + SelectionDAG &DAG) { + SDValue Op = BC->getOperand(0); + EVT DstVT = BC->getValueType(0); + + // The only vector instruction that can produce a scalar (remember, + // since the bitcast was about to be turned into VMOVDRR, the source + // type is i64) from a vector is EXTRACT_VECTOR_ELT. + // Moreover, we can do this combine only if there is one use. + // Finally, if the destination type is not a vector, there is not + // much point on forcing everything on the vector bank. + if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !Op.hasOneUse()) + return SDValue(); + + // If the index is not constant, we will introduce an additional + // multiply that will stick. + // Give up in that case. + ConstantSDNode *Index = dyn_cast(Op.getOperand(1)); + if (!Index) + return SDValue(); + unsigned DstNumElt = DstVT.getVectorNumElements(); + + // Compute the new index. + const APInt &APIntIndex = Index->getAPIntValue(); + APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt); + NewIndex *= APIntIndex; + // Check if the new constant index fits into i32. + if (NewIndex.getBitWidth() > 32) + return SDValue(); + + // vMTy bitcast(i64 extractelt vNi64 src, i32 index) -> + // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M) + SDLoc dl(Op); + SDValue ExtractSrc = Op.getOperand(0); + EVT VecVT = EVT::getVectorVT( + *DAG.getContext(), DstVT.getScalarType(), + ExtractSrc.getValueType().getVectorNumElements() * DstNumElt); + SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast, + DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32)); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -4149,6 +4271,11 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { + // Do not force values to GPRs (this is what VMOVDRR does for the inputs) + // if we can combine the bitcast with its source. + if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG)) + return Val; + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, @@ -4360,7 +4487,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, if (!ST->hasV6T2Ops()) return SDValue(); - SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); + SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0)); return DAG.getNode(ISD::CTLZ, dl, VT, rbit); } @@ -4521,8 +4648,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. - if (!isa(N->getOperand(1)) || - cast(N->getOperand(1))->getZExtValue() != 1) + if (!isOneConstant(N->getOperand(1))) return SDValue(); // If we are in thumb mode, we don't have RRX. @@ -4560,6 +4686,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); + if (CmpVT.getVectorElementType() == MVT::i64) + // 64-bit comparisons are not legal. We've marked SETCC as non-Custom, + // but it's possible that our operands are 64-bit but our result is 32-bit. + // Bail in this case. + return SDValue(); + if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); @@ -5037,10 +5169,16 @@ static bool isVTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { if (M.size() != NumElts && M.size() != NumElts*2) return false; - // If the mask is twice as long as the result then we need to check the upper - // and lower parts of the mask + // If the mask is twice as long as the input vector then we need to check the + // upper and lower parts of the mask with a matching value for WhichResult + // FIXME: A mask with only even values will be rejected in case the first + // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only + // M[0] is used to determine WhichResult for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult)) @@ -5067,7 +5205,10 @@ static bool isVTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult)) @@ -5389,16 +5530,14 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // just use VDUPLANE. We can only do this if the lane being extracted // is at a constant index, as the VDUP from lane instructions only have // constant-index forms. + ConstantSDNode *constIndex; if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa(Value->getOperand(1))) { + (constIndex = dyn_cast(Value->getOperand(1)))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element // such that the register coalescer will remove unnecessary copies. if (VT != Value->getOperand(0).getValueType()) { - ConstantSDNode *constIndex; - constIndex = dyn_cast(Value->getOperand(1)); - assert(constIndex && "The index is not a constant!"); unsigned index = constIndex->getAPIntValue().getLimitedValue() % VT.getVectorNumElements(); N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, @@ -5493,13 +5632,6 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return SDValue(); } -/// getExtFactor - Determine the adjustment factor for the position when -/// generating an "extract from vector registers" instruction. -static unsigned getExtFactor(SDValue &V) { - EVT EltType = V.getValueType().getVectorElementType(); - return EltType.getSizeInBits() / 8; -} - // Gather data to see if the operation can be modelled as a // shuffle in combination with VEXTs. SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, @@ -5541,6 +5673,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); + } else if (!isa(V.getOperand(1))) { + // Furthermore, shuffles require a constant mask, whereas extractelts + // accept variable indices. + return SDValue(); } // Add this element source to the list if it's not already there. @@ -5626,11 +5762,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i32)); - unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1); Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1, VEXTSrc2, - DAG.getConstant(Imm, dl, MVT::i32)); + DAG.getConstant(Src.MinElt, dl, MVT::i32)); Src.WindowBase = -Src.MinElt; } } @@ -6359,6 +6494,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { + // TODO: Should this propagate fast-math-flags? + // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); @@ -6389,6 +6526,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { + // TODO: Should this propagate fast-math-flags? + SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); @@ -6461,6 +6600,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { + // TODO: Should this propagate fast-math-flags? EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); @@ -6569,45 +6709,56 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. - StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); - - // Create stack object for sret. + Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr); auto &DL = DAG.getDataLayout(); - const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); - const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); - int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); ArgListTy Args; - ArgListEntry Entry; - - Entry.Node = SRet; - Entry.Ty = RetTy->getPointerTo(); - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isSRet = true; - Args.push_back(Entry); + bool ShouldUseSRet = Subtarget->isAPCS_ABI(); + SDValue SRet; + if (ShouldUseSRet) { + // Create stack object for sret. + const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); + const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL)); + + ArgListEntry Entry; + Entry.Node = SRet; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isSRet = true; + Args.push_back(Entry); + RetTy = Type::getVoidTy(*DAG.getContext()); + } + ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - const char *LibcallName = (ArgVT == MVT::f64) - ? "__sincos_stret" : "__sincosf_stret"; + const char *LibcallName = + (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; + RTLIB::Libcall LC = + (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32; + CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, - std::move(Args), 0) - .setDiscardResult(); - + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setCallee(CC, RetTy, Callee, std::move(Args), 0) + .setDiscardResult(ShouldUseSRet); std::pair CallResult = LowerCallTo(CLI); + if (!ShouldUseSRet) + return CallResult.first; + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo(), false, false, false, 0); @@ -6622,6 +6773,85 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { LoadSin.getValue(0), LoadCos.getValue(0)); } +SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, + bool Signed, + SDValue &Chain) const { + EVT VT = Op.getValueType(); + assert((VT == MVT::i32 || VT == MVT::i64) && + "unexpected type for custom lowering DIV"); + SDLoc dl(Op); + + const auto &DL = DAG.getDataLayout(); + const auto &TLI = DAG.getTargetLoweringInfo(); + + const char *Name = nullptr; + if (Signed) + Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64"; + else + Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64"; + + SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL)); + + ARMTargetLowering::ArgListTy Args; + + for (auto AI : {1, 0}) { + ArgListEntry Arg; + Arg.Node = Op.getOperand(AI); + Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Arg); + } + + CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()), + ES, std::move(Args), 0); + + return LowerCallTo(CLI).first; +} + +SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, + bool Signed) const { + assert(Op.getValueType() == MVT::i32 && + "unexpected type for custom lowering DIV"); + SDLoc dl(Op); + + SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, + DAG.getEntryNode(), Op.getOperand(1)); + + return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); +} + +void ARMTargetLowering::ExpandDIV_Windows( + SDValue Op, SelectionDAG &DAG, bool Signed, + SmallVectorImpl &Results) const { + const auto &DL = DAG.getDataLayout(); + const auto &TLI = DAG.getTargetLoweringInfo(); + + assert(Op.getValueType() == MVT::i64 && + "unexpected type for custom lowering DIV"); + SDLoc dl(Op); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1), + DAG.getConstant(0, dl, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1), + DAG.getConstant(1, dl, MVT::i32)); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi); + + SDValue DBZCHK = + DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or); + + SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); + + SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result); + SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result, + DAG.getConstant(32, dl, TLI.getPointerTy(DL))); + Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper); + + Results.push_back(Lower); + Results.push_back(Upper); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { // Monotonic load/store is legal for all targets if (cast(Op)->getOrdering() <= Monotonic) @@ -6637,36 +6867,22 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc DL(N); - SDValue Cycles32, OutChain; - - if (Subtarget->hasPerfMon()) { - // Under Power Management extensions, the cycle-count is: - // mrc p15, #0, , c9, c13, #0 - SDValue Ops[] = { N->getOperand(0), // Chain - DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), - DAG.getConstant(15, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(9, DL, MVT::i32), - DAG.getConstant(13, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32) - }; - - Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), Ops); - OutChain = Cycles32.getValue(1); - } else { - // Intrinsic is defined to return 0 on unsupported platforms. Technically - // there are older ARM CPUs that have implementation-specific ways of - // obtaining this information (FIXME!). - Cycles32 = DAG.getConstant(0, DL, MVT::i32); - OutChain = DAG.getEntryNode(); - } - + // Under Power Management extensions, the cycle-count is: + // mrc p15, #0, , c9, c13, #0 + SDValue Ops[] = { N->getOperand(0), // Chain + DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getConstant(15, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(9, DL, MVT::i32), + DAG.getConstant(13, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32) + }; - SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, - Cycles32, DAG.getConstant(0, DL, MVT::i32)); - Results.push_back(Cycles64); - Results.push_back(OutChain); + SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(MVT::i32, MVT::Other), Ops); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32, + DAG.getConstant(0, DL, MVT::i32))); + Results.push_back(Cycles32.getValue(1)); } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -6700,7 +6916,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); @@ -6710,6 +6925,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); + case ISD::SREM: return LowerREM(Op.getNode(), DAG); + case ISD::UREM: return LowerREM(Op.getNode(), DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); @@ -6747,13 +6964,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Don't know how to custom lower this!"); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ARMISD::WIN__DBZCHK: return SDValue(); } } /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl&Results, + SmallVectorImpl &Results, SelectionDAG &DAG) const { SDValue Res; switch (N->getOpcode()) { @@ -6769,9 +6987,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; + case ISD::SREM: + case ISD::UREM: + Res = LowerREM(N, DAG); + break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; + case ISD::UDIV: + case ISD::SDIV: + assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows"); + return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV, + Results); } if (Res.getNode()) Results.push_back(Res); @@ -6917,7 +7144,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, MachineModuleInfo &MMI = MF->getMMI(); for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { - if (!BB->isLandingPad()) continue; + if (!BB->isEHPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing // pad. @@ -6932,7 +7159,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, for (SmallVectorImpl::iterator CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); CSI != CSE; ++CSI) { - CallSiteNumToLPad[*CSI].push_back(BB); + CallSiteNumToLPad[*CSI].push_back(&*BB); MaxCSNum = std::max(MaxCSNum, *CSI); } break; @@ -6965,7 +7192,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // Shove the dispatch's address into the return slot in the function context. MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); - DispatchBB->setIsLandingPad(); + DispatchBB->setIsEHPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); unsigned trap_opcode; @@ -7231,13 +7458,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, BB->succ_end()); while (!Successors.empty()) { MachineBasicBlock *SMBB = Successors.pop_back_val(); - if (SMBB->isLandingPad()) { + if (SMBB->isEHPad()) { BB->removeSuccessor(SMBB); MBBLPads.push_back(SMBB); } } - BB->addSuccessor(DispatchBB); + BB->addSuccessor(DispatchBB, BranchProbability::getZero()); + BB->normalizeSuccProbs(); // Find the invoke call and mark all of the callee-saved registers as // 'implicit defined' so that they're spilled. This prevents code from @@ -7279,7 +7507,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // landing pad now. for (SmallVectorImpl::iterator I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) - (*I)->setIsLandingPad(false); + (*I)->setIsEHPad(false); // The instruction is gone now. MI->eraseFromParent(); @@ -7402,8 +7630,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Otherwise, we will generate unrolled scalar copies. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI->getOperand(0).getReg(); unsigned src = MI->getOperand(1).getReg(); @@ -7695,6 +7922,32 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, return MBB; } +MachineBasicBlock * +ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + + MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock(); + MF->push_back(ContBB); + ContBB->splice(ContBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + MBB->addSuccessor(ContBB); + + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + MF->push_back(TrapBB); + BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249); + MBB->addSuccessor(TrapBB); + + BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ)) + .addReg(MI->getOperand(0).getReg()) + .addMBB(TrapBB); + + MI->eraseFromParent(); + return ContBB; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -7765,8 +8018,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... @@ -7884,8 +8136,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) // SinkBB: V1 = PHI(V2, V3) const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator BBI = BB; - ++BBI; + MachineFunction::iterator BBI = ++BB->getIterator(); MachineFunction *Fn = BB->getParent(); MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); @@ -7949,11 +8200,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitStructByval(MI, BB); case ARM::WIN__CHKSTK: return EmitLowered__chkstk(MI, BB); + case ARM::WIN__DBZCHK: + return EmitLowered__dbzchk(MI, BB); + } +} + +/// \brief Attaches vregs to MEMCPY that it will use as scratch registers +/// when it is expanded into LDM/STM. This is done as a post-isel lowering +/// instead of as a custom inserter because we need the use list from the SDNode. +static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, + MachineInstr *MI, const SDNode *Node) { + bool isThumb1 = Subtarget->isThumb1Only(); + + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *MF = MI->getParent()->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstrBuilder MIB(*MF, MI); + + // If the new dst/src is unused mark it as dead. + if (!Node->hasAnyUseOfValue(0)) { + MI->getOperand(0).setIsDead(true); + } + if (!Node->hasAnyUseOfValue(1)) { + MI->getOperand(1).setIsDead(true); + } + + // The MEMCPY both defines and kills the scratch registers. + for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) { + unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass + : &ARM::GPRRegClass); + MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } } void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { + if (MI->getOpcode() == ARM::MEMCPY) { + attachMEMCPYScratchRegs(Subtarget, MI, Node); + return; + } + const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional @@ -8023,10 +8309,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Helper function that checks if N is a null or all ones constant. static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { - ConstantSDNode *C = dyn_cast(N); - if (!C) - return false; - return AllOnes ? C->isAllOnesValue() : C->isNullValue(); + return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); } // Return true if N is conditionally 0 or all ones. @@ -8848,12 +9131,88 @@ static SDValue PerformXORCombine(SDNode *N, return SDValue(); } -/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff -/// the bits being cleared by the AND are not demanded by the BFI. +// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it, +// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and +// their position in "to" (Rd). +static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) { + assert(N->getOpcode() == ARMISD::BFI); + + SDValue From = N->getOperand(1); + ToMask = ~cast(N->getOperand(2))->getAPIntValue(); + FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation()); + + // If the Base came from a SHR #C, we can deduce that it is really testing bit + // #C in the base of the SHR. + if (From->getOpcode() == ISD::SRL && + isa(From->getOperand(1))) { + APInt Shift = cast(From->getOperand(1))->getAPIntValue(); + assert(Shift.getLimitedValue() < 32 && "Shift too large!"); + FromMask <<= Shift.getLimitedValue(31); + From = From->getOperand(0); + } + + return From; +} + +// If A and B contain one contiguous set of bits, does A | B == A . B? +// +// Neither A nor B must be zero. +static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) { + unsigned LastActiveBitInA = A.countTrailingZeros(); + unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1; + return LastActiveBitInA - 1 == FirstActiveBitInB; +} + +static SDValue FindBFIToCombineWith(SDNode *N) { + // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with, + // if one exists. + APInt ToMask, FromMask; + SDValue From = ParseBFI(N, ToMask, FromMask); + SDValue To = N->getOperand(0); + + // Now check for a compatible BFI to merge with. We can pass through BFIs that + // aren't compatible, but not if they set the same bit in their destination as + // we do (or that of any BFI we're going to combine with). + SDValue V = To; + APInt CombinedToMask = ToMask; + while (V.getOpcode() == ARMISD::BFI) { + APInt NewToMask, NewFromMask; + SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask); + if (NewFrom != From) { + // This BFI has a different base. Keep going. + CombinedToMask |= NewToMask; + V = V.getOperand(0); + continue; + } + + // Do the written bits conflict with any we've seen so far? + if ((NewToMask & CombinedToMask).getBoolValue()) + // Conflicting bits - bail out because going further is unsafe. + return SDValue(); + + // Are the new bits contiguous when combined with the old bits? + if (BitsProperlyConcatenate(ToMask, NewToMask) && + BitsProperlyConcatenate(FromMask, NewFromMask)) + return V; + if (BitsProperlyConcatenate(NewToMask, ToMask) && + BitsProperlyConcatenate(NewFromMask, FromMask)) + return V; + + // We've seen a write to some bits, so track it. + CombinedToMask |= NewToMask; + // Keep going... + V = V.getOperand(0); + } + + return SDValue(); +} + static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N1 = N->getOperand(1); if (N1.getOpcode() == ISD::AND) { + // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff + // the bits being cleared by the AND are not demanded by the BFI. ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); @@ -8869,6 +9228,38 @@ static SDValue PerformBFICombine(SDNode *N, return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); + } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) { + // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes. + // Keep track of any consecutive bits set that all come from the same base + // value. We can combine these together into a single BFI. + SDValue CombineBFI = FindBFIToCombineWith(N); + if (CombineBFI == SDValue()) + return SDValue(); + + // We've found a BFI. + APInt ToMask1, FromMask1; + SDValue From1 = ParseBFI(N, ToMask1, FromMask1); + + APInt ToMask2, FromMask2; + SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2); + assert(From1 == From2); + (void)From2; + + // First, unlink CombineBFI. + DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0)); + // Then create a new BFI, combining the two together. + APInt NewFromMask = FromMask1 | FromMask2; + APInt NewToMask = ToMask1 | ToMask2; + + EVT VT = N->getValueType(0); + SDLoc dl(N); + + if (NewFromMask[0] == 0) + From1 = DCI.DAG.getNode( + ISD::SRL, dl, VT, From1, + DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT)); + return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1, + DCI.DAG.getConstant(~NewToMask, dl, VT)); } return SDValue(); } @@ -9646,32 +10037,6 @@ static SDValue PerformSTORECombine(SDNode *N, return SDValue(); } -// isConstVecPow2 - Return true if each vector element is a power of 2, all -// elements are the same constant, C, and Log2(C) ranges from 1 to 32. -static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) -{ - integerPart cN; - integerPart c0 = 0; - for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); - I != E; I++) { - ConstantFPSDNode *C = dyn_cast(ConstVec.getOperand(I)); - if (!C) - return false; - - bool isExact; - APFloat APF = C->getValueAPF(); - if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) - != APFloat::opOK || !isExact) - return false; - - c0 = (I == 0) ? cN : c0; - if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) - return false; - } - C = c0; - return true; -} - /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) /// can replace combinations of VMUL and VCVT (floating-point to integer) /// when the VMUL has a constant operand that is a power of 2. @@ -9681,30 +10046,25 @@ static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) /// vcvt.s32.f32 d16, d16 /// becomes: /// vcvt.s32.f32 d16, d16, #3 -static SDValue PerformVCVTCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, +static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - SDValue Op = N->getOperand(0); + if (!Subtarget->hasNEON()) + return SDValue(); - if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || - Op.getOpcode() != ISD::FMUL) + SDValue Op = N->getOperand(0); + if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL) return SDValue(); - uint64_t C; - SDValue N0 = Op->getOperand(0); SDValue ConstVec = Op->getOperand(1); - bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; - - if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || - !isConstVecPow2(ConstVec, isSigned, C)) + if (!isa(ConstVec)) return SDValue(); MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); + uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); + uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); - if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 || - NumLanes > 4) { + if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions @@ -9712,16 +10072,22 @@ static SDValue PerformVCVTCombine(SDNode *N, return SDValue(); } + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); + if (C == -1 || C == 0 || C > 32) + return SDValue(); + SDLoc dl(N); + bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, - NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, - DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), - N0, - DAG.getConstant(Log2_64(C), dl, MVT::i32)); + SDValue FixConv = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0), + DAG.getConstant(C, dl, MVT::i32)); - if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv); return FixConv; @@ -9736,38 +10102,44 @@ static SDValue PerformVCVTCombine(SDNode *N, /// vdiv.f32 d16, d17, d16 /// becomes: /// vcvt.f32.s32 d16, d16, #3 -static SDValue PerformVDIVCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, +static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; + if (!Subtarget->hasNEON()) + return SDValue(); + SDValue Op = N->getOperand(0); unsigned OpOpcode = Op.getNode()->getOpcode(); - - if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || + if (!N->getValueType(0).isVector() || (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) return SDValue(); - uint64_t C; SDValue ConstVec = N->getOperand(1); - bool isSigned = OpOpcode == ISD::SINT_TO_FP; - - if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || - !isConstVecPow2(ConstVec, isSigned, C)) + if (!isa(ConstVec)) return SDValue(); MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); - if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + uint32_t IntBits = IntTy.getSizeInBits(); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from i32 to f32. We can handle // smaller integers by generating an extra extend, but larger ones would - // be lossy. + // be lossy. We also can't handle more then 4 lanes, since these intructions + // only support v2i32/v4i32 types. return SDValue(); } + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); + if (C == -1 || C == 0 || C > 32) + return SDValue(); + SDLoc dl(N); + bool isSigned = OpOpcode == ISD::SINT_TO_FP; SDValue ConvInput = Op.getOperand(0); - unsigned NumLanes = Op.getValueType().getVectorNumElements(); - if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + if (IntBits < FloatBits) ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput); @@ -9777,7 +10149,7 @@ static SDValue PerformVDIVCombine(SDNode *N, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), - ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32)); + ConvInput, DAG.getConstant(C, dl, MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate @@ -9840,15 +10212,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // Don't do anything for most intrinsics. break; - case Intrinsic::arm_neon_vabds: - if (!N->getValueType(0).isInteger()) - return SDValue(); - return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - case Intrinsic::arm_neon_vabdu: - return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - // Vector shifts: check for immediate versions and lower them. // Note: This is done during DAG combining instead of DAG legalizing because // the build_vectors for 64-bit vector element shift counts are generally @@ -10077,6 +10440,125 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, + APInt &KnownOne) { + if (Op.getOpcode() == ARMISD::BFI) { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); + + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast(Op.getOperand(2)); + APInt Mask = CI->getAPIntValue(); + KnownZero &= Mask; + KnownOne &= Mask; + return; + } + if (Op.getOpcode() == ARMISD::CMOV) { + APInt KZ2(KnownZero.getBitWidth(), 0); + APInt KO2(KnownOne.getBitWidth(), 0); + computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne); + computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2); + + KnownZero &= KZ2; + KnownOne &= KO2; + return; + } + return DAG.computeKnownBits(Op, KnownZero, KnownOne); +} + +SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { + // If we have a CMOV, OR and AND combination such as: + // if (x & CN) + // y |= CM; + // + // And: + // * CN is a single bit; + // * All bits covered by CM are known zero in y + // + // Then we can convert this into a sequence of BFI instructions. This will + // always be a win if CM is a single bit, will always be no worse than the + // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is + // three bits (due to the extra IT instruction). + + SDValue Op0 = CMOV->getOperand(0); + SDValue Op1 = CMOV->getOperand(1); + auto CCNode = cast(CMOV->getOperand(2)); + auto CC = CCNode->getAPIntValue().getLimitedValue(); + SDValue CmpZ = CMOV->getOperand(4); + + // The compare must be against zero. + if (!isNullConstant(CmpZ->getOperand(1))) + return SDValue(); + + assert(CmpZ->getOpcode() == ARMISD::CMPZ); + SDValue And = CmpZ->getOperand(0); + if (And->getOpcode() != ISD::AND) + return SDValue(); + ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + return SDValue(); + SDValue X = And->getOperand(0); + + if (CC == ARMCC::EQ) { + // We're performing an "equal to zero" compare. Swap the operands so we + // canonicalize on a "not equal to zero" compare. + std::swap(Op0, Op1); + } else { + assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"); + } + + if (Op1->getOpcode() != ISD::OR) + return SDValue(); + + ConstantSDNode *OrC = dyn_cast(Op1->getOperand(1)); + if (!OrC) + return SDValue(); + SDValue Y = Op1->getOperand(0); + + if (Op0 != Y) + return SDValue(); + + // Now, is it profitable to continue? + APInt OrCI = OrC->getAPIntValue(); + unsigned Heuristic = Subtarget->isThumb() ? 3 : 2; + if (OrCI.countPopulation() > Heuristic) + return SDValue(); + + // Lastly, can we determine that the bits defined by OrCI + // are zero in Y? + APInt KnownZero, KnownOne; + computeKnownBits(DAG, Y, KnownZero, KnownOne); + if ((OrCI & KnownZero) != OrCI) + return SDValue(); + + // OK, we can do the combine. + SDValue V = Y; + SDLoc dl(X); + EVT VT = X.getValueType(); + unsigned BitInX = AndC->getAPIntValue().logBase2(); + + if (BitInX != 0) { + // We must shift X first. + X = DAG.getNode(ISD::SRL, dl, VT, X, + DAG.getConstant(BitInX, dl, VT)); + } + + for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits(); + BitInY < NumActiveBits; ++BitInY) { + if (OrCI[BitInY] == 0) + continue; + APInt Mask(VT.getSizeInBits(), 0); + Mask.setBit(BitInY); + V = DAG.getNode(ARMISD::BFI, dl, VT, V, X, + // Confusingly, the operand is an *inverted* mask. + DAG.getConstant(~Mask, dl, VT)); + } + + return V; +} + /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { @@ -10095,6 +10577,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + // BFI is only available on V6T2+. + if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { + SDValue R = PerformCMOVToBFICombine(N, DAG); + if (R) + return R; + } + // Simplify // mov r1, r0 // cmp r1, x @@ -10161,8 +10650,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); - case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); + case ISD::FP_TO_UINT: + return PerformVCVTCombine(N, DCI.DAG, Subtarget); + case ISD::FDIV: + return PerformVDIVCombine(N, DCI.DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: @@ -10984,7 +11475,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'J': - if (Subtarget->isThumb()) { // FIXME thumb2 + if (Subtarget->isThumb1Only()) { // This must be a constant between -255 and -1, for negated ADD // immediates. This can be used in GCC with an "n" modifier that // prints the negated value, for use with SUB instructions. It is @@ -11053,7 +11544,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'M': - if (Subtarget->isThumb()) { // FIXME thumb2 + if (Subtarget->isThumb1Only()) { // This must be a multiple of 4 between 0 and 1020, for // ADD sp + immediate. if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) @@ -11095,38 +11586,61 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { - assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) && - "Register-based DivRem lowering only"); - unsigned Opcode = Op->getOpcode(); - assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && - "Invalid opcode for Div/Rem lowering"); - bool isSigned = (Opcode == ISD::SDIVREM); - EVT VT = Op->getValueType(0); - Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - +static RTLIB::Libcall getDivRemLibcall( + const SDNode *N, MVT::SimpleValueType SVT) { + assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || + N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && + "Unhandled Opcode in getDivRemLibcall"); + bool isSigned = N->getOpcode() == ISD::SDIVREM || + N->getOpcode() == ISD::SREM; RTLIB::Libcall LC; - switch (VT.getSimpleVT().SimpleTy) { + switch (SVT) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; } + return LC; +} - SDValue InChain = DAG.getEntryNode(); - +static TargetLowering::ArgListTy getDivRemArgList( + const SDNode *N, LLVMContext *Context) { + assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || + N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && + "Unhandled Opcode in getDivRemArgList"); + bool isSigned = N->getOpcode() == ISD::SDIVREM || + N->getOpcode() == ISD::SREM; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { - EVT ArgVT = Op->getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op->getOperand(i); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + EVT ArgVT = N->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*Context); + Entry.Node = N->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); } + return Args; +} + +SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { + assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) && + "Register-based DivRem lowering only"); + unsigned Opcode = Op->getOpcode(); + assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && + "Invalid opcode for Div/Rem lowering"); + bool isSigned = (Opcode == ISD::SDIVREM); + EVT VT = Op->getValueType(0); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + + RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(), + VT.getSimpleVT().SimpleTy); + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(), + DAG.getContext()); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); @@ -11143,6 +11657,47 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { return CallInfo.first; } +// Lowers REM using divmod helpers +// see RTABI section 4.2/4.3 +SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { + // Build return types (div and rem) + std::vector RetTyParams; + Type *RetTyElement; + + switch (N->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break; + case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break; + case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break; + case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break; + } + + RetTyParams.push_back(RetTyElement); + RetTyParams.push_back(RetTyElement); + ArrayRef ret = ArrayRef(RetTyParams); + Type *RetTy = StructType::get(*DAG.getContext(), ret); + + RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT(). + SimpleTy); + SDValue InChain = DAG.getEntryNode(); + TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext()); + bool isSigned = N->getOpcode() == ISD::SREM; + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy(DAG.getDataLayout())); + + // Lower call + CallLoweringInfo CLI(DAG); + CLI.setChain(InChain) + .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N)); + std::pair CallResult = LowerCallTo(CLI); + + // Return second (rem) result operand (first contains div) + SDNode *ResNode = CallResult.first.getNode(); + assert(ResNode->getNumOperands() == 2 && "divmod should return two operands"); + return ResNode->getOperand(1); +} + SDValue ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "unsupported target platform"); @@ -11177,8 +11732,8 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; + return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, + SDLoc(Op)).first; } SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { @@ -11190,8 +11745,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; + return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, + SDLoc(Op)).first; } bool @@ -11239,7 +11794,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -11265,7 +11820,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); @@ -11348,8 +11903,6 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } - Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -11445,19 +11998,26 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that // guarantee, see DDI0406C ARM architecture reference manual, // sections A8.8.72-74 LDRD) -bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return (Size == 64) && !Subtarget->isMClass(); + return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; } // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles -TargetLoweringBase::AtomicRMWExpansionKind +TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); return (Size <= (Subtarget->isMClass() ? 32U : 64U)) - ? AtomicRMWExpansionKind::LLSC - : AtomicRMWExpansionKind::None; + ? AtomicExpansionKind::LLSC + : AtomicExpansionKind::None; +} + +bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *AI) const { + return true; } // This has so far only been implemented for MachO. @@ -11494,6 +12054,14 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, return false; } +bool ARMTargetLowering::isCheapToSpeculateCttz() const { + return Subtarget->hasV6T2Ops(); +} + +bool ARMTargetLowering::isCheapToSpeculateCtlz() const { + return Subtarget->hasV6T2Ops(); +} + Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -11530,6 +12098,14 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, cast(Addr->getType())->getElementType()); } +void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( + IRBuilder<> &Builder) const { + if (!Subtarget->hasV7Ops()) + return; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); +} + Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { @@ -11587,12 +12163,12 @@ bool ARMTargetLowering::lowerInterleavedLoad( Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't - // support i64/f64 element). - if ((VecSize != 64 && VecSize != 128) || EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types and vector types + // with i64/f64 elements (vldN doesn't support i64/f64 elements). + if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits) return false; // A pointer vector can not be the return type of the ldN intrinsics. Need to @@ -11605,9 +12181,6 @@ bool ARMTargetLowering::lowerInterleavedLoad( Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; - Function *VldnFunc = - Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy); - IRBuilder<> Builder(LI); SmallVector Ops; @@ -11615,6 +12188,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); Ops.push_back(Builder.getInt32(LI->getAlignment())); + Type *Tys[] = { VecTy, Int8Ptr }; + Function *VldnFunc = + Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); // Replace uses of each shufflevector with the corresponding vector loaded @@ -11677,12 +12253,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip illegal sub vector types and vector types of i64/f64 element (vstN - // doesn't support i64/f64 element). - if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types and vector types + // with i64/f64 elements (vstN doesn't support i64/f64 elements). + if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) || + EltIs64Bits) return false; Value *Op0 = SVI->getOperand(0); @@ -11703,17 +12280,18 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, SubVecTy = VectorType::get(IntTy, NumSubElts); } - static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, - Intrinsic::arm_neon_vst3, - Intrinsic::arm_neon_vst4}; - Function *VstNFunc = Intrinsic::getDeclaration( - SI->getModule(), StoreInts[Factor - 2], SubVecTy); - + static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, + Intrinsic::arm_neon_vst3, + Intrinsic::arm_neon_vst4}; SmallVector Ops; Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); + Type *Tys[] = { Int8Ptr, SubVecTy }; + Function *VstNFunc = Intrinsic::getDeclaration( + SI->getModule(), StoreInts[Factor - 2], Tys); + // Split the shufflevector operands into sub vectors for the new vstN call. for (unsigned i = 0; i < Factor; i++) Ops.push_back(Builder.CreateShuffleVector( @@ -11800,3 +12378,17 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); return IsHA || IsIntArray; } + +unsigned ARMTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + // Platforms which do not use SjLj EH may return values in these registers + // via the personality function. + return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0; +} + +unsigned ARMTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + // Platforms which do not use SjLj EH may return values in these registers + // via the personality function. + return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1; +}