X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCISelLowering.cpp;h=4764bf8a529f4dfdd35f39fe97836c46743896cd;hb=c848b1bbcf88ab5d8318d990612fb1fda206ea3d;hp=8195f5eb050c1989ecfbaca19c6108a9068bcd56;hpb=7530a9f7d1e62b28e04d771158613c2954cc8d8c;p=oota-llvm.git diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8195f5eb050..4764bf8a529 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16,6 +16,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" +#include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -36,21 +37,6 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - static cl::opt DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -60,17 +46,22 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi static cl::opt DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); +// FIXME: Remove this once the bug has been fixed! +extern cl::opt ANDIGlueBug; + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); + if (TM.getSubtargetImpl()->isSVR4ABI()) + return new PPC64LinuxTargetObjectFile(); + return new TargetLoweringObjectFileELF(); } PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget(); - PPCRegInfo = TM.getRegisterInfo(); setPow2DivIsCheap(); @@ -106,6 +97,39 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); + if (Subtarget->useCRBits()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + if (isPPC64 || Subtarget->hasFPCVT()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + } else { + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); + } + + // PowerPC does not support direct load / store of condition registers + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + + // FIXME: Remove this once the ANDI glue bug is fixed: + if (ANDIGlueBug) + setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); + + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); + setTruncStoreAction(MVT::i32, MVT::i1, Expand); + setTruncStoreAction(MVT::i16, MVT::i1, Expand); + setTruncStoreAction(MVT::i8, MVT::i1, Expand); + + addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); + } + // This is used in the ppcf128->int sequence. Note it has different semantics // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); @@ -161,28 +185,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + if (Subtarget->hasFCPSGN()) { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); + } else { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + } if (Subtarget->hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - - // frin does not implement "ties to even." Thus, this is safe only in - // fast-math mode. - if (TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - - // These need to set FE_INEXACT, and use a custom inserter. - setOperationAction(ISD::FRINT, MVT::f64, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - } + setOperationAction(ISD::FROUND, MVT::f32, Legal); } // PowerPC does not have BSWAP, CTPOP or CTTZ @@ -207,21 +227,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); - // PowerPC does not have Select - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::i64, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + if (!Subtarget->useCRBits()) { + // PowerPC does not have Select + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + } // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit - setOperationAction(ISD::SETCC, MVT::i32, Custom); + if (!Subtarget->useCRBits()) + setOperationAction(ISD::SETCC, MVT::i32, Custom); // PowerPC does not have BRCOND which requires SetCC - setOperationAction(ISD::BRCOND, MVT::Other, Expand); + if (!Subtarget->useCRBits()) + setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -240,11 +264,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no @@ -297,8 +316,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } else setOperationAction(ISD::VAARG, MVT::Other, Expand); + if (Subtarget->isSVR4ABI() && !isPPC64) + // VACOPY is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VACOPY , MVT::Other, Custom); + else + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + // Use the default implementation. - setOperationAction(ISD::VACOPY , MVT::Other, Expand); setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); @@ -308,6 +332,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + // To handle counter-based loop conditions. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); + // Comparisons that require checking two conditions. setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); @@ -406,6 +433,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); @@ -457,7 +485,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT, MVT::v4i32, + Subtarget->useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); @@ -476,7 +505,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); - if (TM.Options.UnsafeFPMath) { + if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } @@ -500,6 +529,86 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); + + setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); + + if (Subtarget->hasVSX()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::MUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMA, MVT::v2f64, Legal); + + setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + + setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); + setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); + setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); + + // Share the Altivec comparison restrictions. + setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand); + + setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); + + setOperationAction(ISD::LOAD, MVT::v2f64, Legal); + setOperationAction(ISD::STORE, MVT::v2f64, Legal); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); + + addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); + + addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); + addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); + + // VSX v2i64 only supports non-arithmetic operations. + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + + setOperationAction(ISD::SHL, MVT::v2i64, Expand); + setOperationAction(ISD::SRA, MVT::v2i64, Expand); + setOperationAction(ISD::SRL, MVT::v2i64, Expand); + + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + + setOperationAction(ISD::LOAD, MVT::v2i64, Promote); + AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); + setOperationAction(ISD::STORE, MVT::v2i64, Promote); + AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); + + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + + // Vector operation legalization checks the result type of + // SIGN_EXTEND_INREG, overall legalization checks the inner type. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); + + addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); + } } if (Subtarget->has64BitSupport()) { @@ -513,7 +622,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? + // Altivec instructions set fields to all zeros or all ones. + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (isPPC64) { setStackPointerRegisterToSaveRestore(PPC::X1); @@ -527,9 +637,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); + if (Subtarget->useCRBits()) + setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + + if (Subtarget->useCRBits()) { + setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::SELECT_CC); + } // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { @@ -551,6 +675,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); } + // With 32 condition bits, we don't need to sink (and duplicate) compares + // aggressively in CodeGenPrep. + if (Subtarget->useCRBits()) + setHasMultipleConditionRegisters(); + setMinFunctionAlignment(2); if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); @@ -562,7 +691,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setInsertFencesForAtomic(true); - setSchedulingPreference(Sched::Hybrid); + if (Subtarget->enableMachineScheduler()) + setSchedulingPreference(Sched::Source); + else + setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); @@ -581,29 +713,52 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } } +/// getMaxByValAlign - Helper for getByValTypeAlignment to determine +/// the desired ByVal argument alignment. +static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, + unsigned MaxMaxAlign) { + if (MaxAlign == MaxMaxAlign) + return; + if (VectorType *VTy = dyn_cast(Ty)) { + if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) + MaxAlign = 32; + else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) + MaxAlign = 16; + } else if (ArrayType *ATy = dyn_cast(Ty)) { + unsigned EltAlign = 0; + getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + } else if (StructType *STy = dyn_cast(Ty)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned EltAlign = 0; + getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + if (MaxAlign == MaxMaxAlign) + break; + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { - const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. - if (TM.getSubtarget().isDarwin()) + if (PPCSubTarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. - if (VectorType *VTy = dyn_cast(Ty)) - if (VTy->getBitWidth() >= 128) - return 16; - // The rest is 8 on PPC64 and 4 on PPC32 boundary. - if (PPCSubTarget.isPPC64()) - return 8; - - return 4; + unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; + if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) + getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); + return Align; } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case PPCISD::FSEL: return "PPCISD::FSEL"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; @@ -632,7 +787,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; - case PPCISD::MFCR: return "PPCISD::MFCR"; + case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; @@ -640,6 +795,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::LARX: return "PPCISD::LARX"; case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; + case PPCISD::BDNZ: return "PPCISD::BDNZ"; + case PPCISD::BDZ: return "PPCISD::BDZ"; case PPCISD::MFFS: return "PPCISD::MFFS"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; @@ -648,6 +805,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; + case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; @@ -660,12 +818,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; + case PPCISD::SC: return "PPCISD::SC"; } } -EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { +EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) - return MVT::i32; + return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32; return VT.changeVectorElementTypeToInteger(); } @@ -731,8 +890,8 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { - assert(N->getValueType(0) == MVT::v16i8 && - "PPC only supports shuffles by bytes!"); + if (N->getValueType(0) != MVT::v16i8) + return false; assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); @@ -769,8 +928,8 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { - assert(N->getValueType(0) == MVT::v16i8 && - "PPC only supports shuffles by bytes!"); + if (N->getValueType(0) != MVT::v16i8) + return -1; ShuffleVectorSDNode *SVOp = cast(N); @@ -860,7 +1019,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { /// the constant being splatted. The ByteSize field indicates the number of /// bytes of each element [124] -> [bhw]. SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { - SDValue OpVal(0, 0); + SDValue OpVal(nullptr, 0); // If ByteSize of the splat is bigger than the element size of the // build_vector, then we have a case where we are checking for a splat where @@ -879,7 +1038,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { if (!isa(N->getOperand(i))) return SDValue(); - if (UniquedVals[i&(Multiple-1)].getNode() == 0) + if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) return SDValue(); // no match. @@ -894,21 +1053,21 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { bool LeadingZero = true; bool LeadingOnes = true; for (unsigned i = 0; i != Multiple-1; ++i) { - if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. + if (!UniquedVals[i].getNode()) continue; // Must have been undefs. LeadingZero &= cast(UniquedVals[i])->isNullValue(); LeadingOnes &= cast(UniquedVals[i])->isAllOnesValue(); } // Finally, check the least significant entry. if (LeadingZero) { - if (UniquedVals[Multiple-1].getNode() == 0) + if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef int Val = cast(UniquedVals[Multiple-1])->getZExtValue(); if (Val < 16) return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) } if (LeadingOnes) { - if (UniquedVals[Multiple-1].getNode() == 0) + if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef int Val =cast(UniquedVals[Multiple-1])->getSExtValue(); if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) @@ -921,13 +1080,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) + if (!OpVal.getNode()) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) return SDValue(); } - if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. + if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. unsigned ValSizeInBytes = EltSize; uint64_t Value = 0; @@ -1034,24 +1193,68 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, return false; } +// If we happen to be doing an i64 load or store into a stack slot that has +// less than a 4-byte alignment, then the frame-index elimination may need to +// use an indexed load or store instruction (because the offset may not be a +// multiple of 4). The extra register needed to hold the offset comes from the +// register scavenger, and it is possible that the scavenger will need to use +// an emergency spill slot. As a result, we need to make sure that a spill slot +// is allocated when doing an i64 load/store into a less-than-4-byte-aligned +// stack slot. +static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { + // FIXME: This does not handle the LWA case. + if (VT != MVT::i64) + return; + + // NOTE: We'll exclude negative FIs here, which come from argument + // lowering, because there are no known test cases triggering this problem + // using packed structures (or similar). We can remove this exclusion if + // we find such a test case. The reason why this is so test-case driven is + // because this entire 'fixup' is only to prevent crashes (from the + // register scavenger) on not-really-valid inputs. For example, if we have: + // %a = alloca i1 + // %b = bitcast i1* %a to i64* + // store i64* a, i64 b + // then the store should really be marked as 'align 1', but is not. If it + // were marked as 'align 1' then the indexed form would have been + // instruction-selected initially, and the problem this 'fixup' is preventing + // won't happen regardless. + if (FrameIdx < 0) + return; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + if (Align >= 4) + return; + + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setHasNonRISpills(); +} + /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better -/// represented as reg+reg. +/// represented as reg+reg. If Aligned is true, only accept displacements +/// suitable for STD and friends, i.e. multiples of 4. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG) const { + SelectionDAG &DAG, + bool Aligned) const { // FIXME dl should come from parent load or store, not from address - DebugLoc dl = N.getDebugLoc(); + SDLoc dl(N); // If this can be more profitably realized as r+r, fail. if (SelectAddressRegReg(N, Disp, Base, DAG)) return false; if (N.getOpcode() == ISD::ADD) { short imm = 0; - if (isIntS16Immediate(N.getOperand(1), imm)) { - Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); + if (isIntS16Immediate(N.getOperand(1), imm) && + (!Aligned || (imm & 3) == 0)) { + Disp = DAG.getTargetConstant(imm, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } @@ -1070,7 +1273,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } } else if (N.getOpcode() == ISD::OR) { short imm = 0; - if (isIntS16Immediate(N.getOperand(1), imm)) { + if (isIntS16Immediate(N.getOperand(1), imm) && + (!Aligned || (imm & 3) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. @@ -1081,7 +1285,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If all of the bits are known zero on the LHS or RHS, the add won't // carry. Base = N.getOperand(0); - Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); + Disp = DAG.getTargetConstant(imm, N.getValueType()); return true; } } @@ -1091,7 +1295,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" short Imm; - if (isIntS16Immediate(CN, Imm)) { + if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); @@ -1099,8 +1303,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } // Handle 32-bit sext immediates with LIS + addr mode. - if (CN->getValueType(0) == MVT::i32 || - (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { + if ((CN->getValueType(0) == MVT::i32 || + (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && + (!Aligned || (CN->getZExtValue() & 3) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. @@ -1114,9 +1319,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } Disp = DAG.getTargetConstant(0, getPointerTy()); - if (FrameIndexSDNode *FI = dyn_cast(N)) + if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else Base = N; return true; // [r+0] } @@ -1148,92 +1354,6 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, return true; } -/// SelectAddressRegImmShift - Returns true if the address N can be -/// represented by a base register plus a signed 14-bit displacement -/// [r+imm*4]. Suitable for use by STD and friends. -bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, - SDValue &Base, - SelectionDAG &DAG) const { - // FIXME dl should come from the parent load or store, not the address - DebugLoc dl = N.getDebugLoc(); - // If this can be more profitably realized as r+r, fail. - if (SelectAddressRegReg(N, Disp, Base, DAG)) - return false; - - if (N.getOpcode() == ISD::ADD) { - short imm = 0; - if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { - Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); - if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { - Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - } else { - Base = N.getOperand(0); - } - return true; // [r+i] - } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { - // Match LOAD (ADD (X, Lo(G))). - assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() - && "Cannot handle constant offsets yet!"); - Disp = N.getOperand(1).getOperand(0); // The global address. - assert(Disp.getOpcode() == ISD::TargetGlobalAddress || - Disp.getOpcode() == ISD::TargetConstantPool || - Disp.getOpcode() == ISD::TargetJumpTable); - Base = N.getOperand(0); - return true; // [&g+r] - } - } else if (N.getOpcode() == ISD::OR) { - short imm = 0; - if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { - // If this is an or of disjoint bitfields, we can codegen this as an add - // (for better address arithmetic) if the LHS and RHS of the OR are - // provably disjoint. - APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); - if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { - // If all of the bits are known zero on the LHS or RHS, the add won't - // carry. - Base = N.getOperand(0); - Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); - return true; - } - } - } else if (ConstantSDNode *CN = dyn_cast(N)) { - // Loading from a constant address. Verify low two bits are clear. - if ((CN->getZExtValue() & 3) == 0) { - // If this address fits entirely in a 14-bit sext immediate field, codegen - // this as "d, 0" - short Imm; - if (isIntS16Immediate(CN, Imm)) { - Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, - CN->getValueType(0)); - return true; - } - - // Fold the low-part of 32-bit absolute addresses into addr mode. - if (CN->getValueType(0) == MVT::i32 || - (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { - int Addr = (int)CN->getZExtValue(); - - // Otherwise, break this down into an LIS + disp. - Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); - Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); - unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; - Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0); - return true; - } - } - } - - Disp = DAG.getTargetConstant(0, getPointerTy()); - if (FrameIndexSDNode *FI = dyn_cast(N)) - Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else - Base = N; - return true; // [r+0] -} - - /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -1286,18 +1406,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, return true; } - // LDU/STU use reg+imm*4, others use reg+imm. + // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { - // reg + imm - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; - // reg + imm * 4. - if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) return false; } @@ -1321,9 +1439,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, /// GetLabelAccessInfo - Return true if we should reference labels using a /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, - unsigned &LoOpFlags, const GlobalValue *GV = 0) { - HiOpFlags = PPCII::MO_HA16; - LoOpFlags = PPCII::MO_LO16; + unsigned &LoOpFlags, + const GlobalValue *GV = nullptr) { + HiOpFlags = PPCII::MO_HA; + LoOpFlags = PPCII::MO_LO; // Don't use the pic base if not in PIC relocation model. Or if we are on a // non-darwin platform. We don't support PIC on other platforms yet. @@ -1353,7 +1472,7 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG) { EVT PtrVT = HiPart.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); - DebugLoc DL = HiPart.getDebugLoc(); + SDLoc DL(HiPart); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); @@ -1378,7 +1497,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, // The actual address of the GlobalValue is stored in the TOC. if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); - return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, + return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); } @@ -1399,7 +1518,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // The actual address of the GlobalValue is stored in the TOC. if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, + return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); } @@ -1426,8 +1545,12 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + // FIXME: TLS addresses currently use medium model code sequences, + // which is the most useful form. Eventually support for small and + // large models could be added if users need it, at the cost of + // additional complexity. GlobalAddressSDNode *GA = cast(Op); - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(); bool is64bit = PPCSubTarget.isPPC64(); @@ -1436,26 +1559,29 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, if (Model == TLSModel::LocalExec) { SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TPREL16_HA); + PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TPREL16_LO); + PPCII::MO_TPREL_LO); SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, is64bit ? MVT::i64 : MVT::i32); SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } - if (!is64bit) - llvm_unreachable("only local-exec is currently supported for ppc32"); - if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, - PtrVT, GOTReg, TGA); + SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TLS); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + } else + GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, - PtrVT, TGA, TPOffsetHi); - return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); + PtrVT, TGA, GOTPtr); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } if (Model == TLSModel::GeneralDynamic) { @@ -1513,7 +1639,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); - DebugLoc DL = GSDN->getDebugLoc(); + SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); // 64-bit SVR4 ABI code is always position-independent. @@ -1544,7 +1670,28 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(2))->get(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); + + if (Op.getValueType() == MVT::v2i64) { + // When the operands themselves are v2i64 values, we need to do something + // special because VSX has no underlying comparison operations for these. + if (Op.getOperand(0).getValueType() == MVT::v2i64) { + // Equality can be handled by casting to the legal type for Altivec + // comparisons, everything else needs to be expanded. + if (CC == ISD::SETEQ || CC == ISD::SETNE) { + return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, + DAG.getSetCC(dl, MVT::v4i32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), + CC)); + } + + return SDValue(); + } + + // We handle most of these in the usual way. + return Op; + } // If we're comparing for equality to zero, expose the fact that this is // implented as a ctlz/srl pair on ppc, so that the dag combiner can @@ -1593,7 +1740,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast(Node->getOperand(2))->getValue(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); @@ -1693,6 +1840,18 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, false, false, false, 0); } +SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const { + assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); + + // We have to copy the entire va_list struct: + // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte + return DAG.getMemcpy(Op.getOperand(0), Op, + Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(12, MVT::i32), 8, false, true, + MachinePointerInfo(), MachinePointerInfo()); +} + SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { return Op.getOperand(0); @@ -1704,7 +1863,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); @@ -1746,7 +1905,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the @@ -1840,19 +1999,25 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +// Function whose sole purpose is to kill compiler warnings +// stemming from unused functions included from PPCGenCallingConv.inc. +CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { + return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; +} + +bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { return true; } -static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - static const uint16_t ArgRegs[] = { +bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const MCPhysReg ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; @@ -1874,12 +2039,12 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - static const uint16_t ArgRegs[] = { +bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const MCPhysReg ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -1903,8 +2068,8 @@ static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, /// GetFPR - Get the set of FP registers that should be allocated for arguments, /// on Darwin. -static const uint16_t *GetFPR() { - static const uint16_t FPR[] = { +static const MCPhysReg *GetFPR() { + static const MCPhysReg FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 }; @@ -1916,7 +2081,7 @@ static const uint16_t *GetFPR() { /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { - unsigned ArgSize = ArgVT.getSizeInBits()/8; + unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -1929,7 +2094,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { if (PPCSubTarget.isSVR4ABI()) { @@ -1951,7 +2116,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: @@ -2014,6 +2179,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); + case MVT::i1: case MVT::i32: RC = &PPC::GPRCRegClass; break; @@ -2021,7 +2187,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( RC = &PPC::F4RCRegClass; break; case MVT::f64: - RC = &PPC::F8RCRegClass; + if (PPCSubTarget.hasVSX()) + RC = &PPC::VSFRCRegClass; + else + RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: @@ -2029,18 +2198,26 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( case MVT::v4f32: RC = &PPC::VRRCRegClass; break; + case MVT::v2f64: + case MVT::v2i64: + RC = &PPC::VSHRCRegClass; + break; } // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, + ValVT == MVT::i1 ? MVT::i32 : ValVT); + + if (ValVT == MVT::i1) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + unsigned ArgSize = VA.getLocVT().getStoreSize(); int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), isImmutable); @@ -2089,13 +2266,13 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - static const uint16_t GPArgRegs[] = { + static const MCPhysReg GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); - static const uint16_t FPArgRegs[] = { + static const MCPhysReg FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -2168,15 +2345,15 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, - DebugLoc dl) const { + SDLoc dl) const { if (Flags.isSExt()) ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); else if (Flags.isZExt()) ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); - - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); + + return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } // Set the size that is at least reserved in caller of this function. Tail @@ -2211,7 +2388,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // @@ -2229,17 +2406,21 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; - static const uint16_t GPR[] = { + static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; + static const MCPhysReg VSRH[] = { + PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, + PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 + }; const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; @@ -2259,7 +2440,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; - unsigned ObjSize = ObjectVT.getSizeInBits()/8; + unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); @@ -2269,7 +2450,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || - ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { + ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 || + ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) { if (isVarArg) { MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += CalculateStackSlotSize(ObjectVT, @@ -2302,6 +2484,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(FIN); continue; } + + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + CurArgOffset = ArgOffset; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (ObjSize < PtrByteSize) CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); @@ -2320,7 +2509,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, CurArgOffset), + MachinePointerInfo(FuncArg), ObjType, false, false, 0); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), @@ -2332,7 +2521,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg), false, false, 0); } @@ -2356,7 +2545,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2371,13 +2560,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); @@ -2403,7 +2593,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ? + &PPC::VSFRCRegClass : + &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; @@ -2418,10 +2610,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? + MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : + MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -2500,7 +2696,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // @@ -2519,18 +2715,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; - static const uint16_t GPR_32[] = { // 32-bit registers. + static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const uint16_t GPR_64[] = { // 64-bit registers. + static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -2541,7 +2737,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have @@ -2568,6 +2764,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( switch(ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: case MVT::f32: VecArgOffset += 4; @@ -2598,17 +2795,17 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector MemOps; unsigned nAltivecParamsAtEnd = 0; - // FIXME: FuncArg and Ins[ArgNo] must reference the same argument. - // When passing anonymous aggregates, this is currently not true. - // See LowerFormalArguments_64SVR4 for a fix. Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { + unsigned CurArgIdx = 0; + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[ArgNo].OrigArgIndex; unsigned CurArgOffset = ArgOffset; @@ -2652,8 +2849,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, - CurArgOffset), + MachinePointerInfo(FuncArg), ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2677,7 +2873,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2692,11 +2888,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin( switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + + if (ObjectVT == MVT::i1) + ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); + ++GPR_idx; } else { needsLoad = true; @@ -2712,7 +2913,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); @@ -2875,7 +3076,8 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || - ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { + ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 || + ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) { if (!isVarArg && !isPPC64) { // Non-varargs Altivec parameters go after all the non-Altivec // parameters; handle those later so we know how much padding we need. @@ -2955,7 +3157,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Flags.isByVal()) return false; } - // Non PIC/GOT tail calls are supported. + // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; @@ -2973,12 +3175,12 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, /// 32-bit value is representable in the immediate field of a BxA instruction. static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast(Op); - if (!C) return 0; + if (!C) return nullptr; int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. SignExtend32<26>(Addr) != Addr) - return 0; // Top 6 bits have to be sext of immediate. + return nullptr; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, DAG.getTargetLoweringInfo().getPointerTy()).getNode(); @@ -3000,9 +3202,9 @@ struct TailCallArgumentInfo { static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, - const SmallVector &TailCallArgs, - SmallVector &MemOpChains, - DebugLoc dl) { + const SmallVectorImpl &TailCallArgs, + SmallVectorImpl &MemOpChains, + SDLoc dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; SDValue FIN = TailCallArgs[i].FrameIdxOp; @@ -3024,7 +3226,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int SPDiff, bool isPPC64, bool isDarwinABI, - DebugLoc dl) { + SDLoc dl) { if (SPDiff) { // Calculate the new stack slot for the return address. int SlotSize = isPPC64 ? 8 : 4; @@ -3059,7 +3261,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, - SmallVector& TailCallArguments) { + SmallVectorImpl& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); @@ -3081,7 +3283,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, SDValue &LROpOut, SDValue &FPOpOut, bool isDarwinABI, - DebugLoc dl) const { + SDLoc dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; @@ -3111,11 +3313,11 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, MachinePointerInfo(0), - MachinePointerInfo(0)); + false, false, MachinePointerInfo(), + MachinePointerInfo()); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of @@ -3124,9 +3326,9 @@ static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, - bool isVector, SmallVector &MemOpChains, - SmallVector &TailCallArguments, - DebugLoc dl) { + bool isVector, SmallVectorImpl &MemOpChains, + SmallVectorImpl &TailCallArguments, + SDLoc dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); if (!isTailCall) { if (isVector) { @@ -3147,9 +3349,9 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, - DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, + SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, bool isDarwinABI, - SmallVector &TailCallArguments) { + SmallVectorImpl &TailCallArguments) { MachineFunction &MF = DAG.getMachineFunction(); // Emit a sequence of copyto/copyfrom virtual registers for arguments that @@ -3169,15 +3371,15 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, // Emit callseq_end just before tailcall node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); + DAG.getIntPtrConstant(0, true), InFlag, dl); InFlag = Chain.getValue(1); } static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, - SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, - SmallVector, 8> &RegsToPass, - SmallVector &Ops, std::vector &NodeTys, + SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, + SmallVectorImpl > &RegsToPass, + SmallVectorImpl &Ops, std::vector &NodeTys, const PPCSubtarget &PPCSubTarget) { bool isPPC64 = PPCSubTarget.isPPC64(); @@ -3313,7 +3515,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, } Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, - 2 + (InFlag.getNode() != 0)); + InFlag.getNode() ? 3 : 2); InFlag = Chain.getValue(1); NodeTys.clear(); @@ -3321,7 +3523,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); CallOpc = PPCISD::BCTRL; - Callee.setNode(0); + Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) if (isSVR4ABI && isPPC64) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); @@ -3361,7 +3563,7 @@ SDValue PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { SmallVector RVLocs; @@ -3404,7 +3606,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, } SDValue -PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, +PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, bool isVarArg, SelectionDAG &DAG, SmallVector, 8> @@ -3474,7 +3676,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { + } else if ((CallOpc == PPCISD::CALL) && + (!isLocalCall(Callee) || + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } @@ -3491,7 +3695,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), - InFlag); + InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); @@ -3503,10 +3707,10 @@ SDValue PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; @@ -3517,6 +3721,10 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); + if (PPCSubTarget.isSVR4ABI()) { if (PPCSubTarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, @@ -3540,7 +3748,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. @@ -3626,7 +3834,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be moved somewhere else @@ -3677,7 +3886,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1)); + CallSeqStart.getNode()->getOperand(1), + SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); Chain = CallSeqStart = NewCallSeqStart; @@ -3689,6 +3899,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } if (VA.isRegLoc()) { + if (Arg.getValueType() == MVT::i1) + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); + seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -3753,13 +3966,14 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // The MEMCPY must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1)); + CallSeqStart.getNode()->getOperand(1), + SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); return NewCallSeqStart; @@ -3772,7 +3986,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { unsigned NumOps = Outs.size(); @@ -3813,7 +4027,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else @@ -3834,16 +4049,21 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - static const uint16_t GPR[] = { + static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; + static const MCPhysReg VSRH[] = { + PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, + PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 + }; + const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); @@ -3865,7 +4085,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); // Promote integers to 64-bit values. - if (Arg.getValueType() == MVT::i32) { + if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); @@ -3887,6 +4107,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + if (BVAlign % PtrByteSize != 0) + llvm_unreachable( + "ByVal alignment is not a multiple of the pointer size"); + + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); @@ -3938,7 +4167,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // register. // FIXME: The memcpy seems to produce pretty awful code for // small aggregates, particularly for packed ones. - // FIXME: It would be preferable to use the slot in the + // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); @@ -3978,8 +4207,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, continue; } - switch (Arg.getValueType().getSimpleVT().SimpleTy) { + switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != NumGPRs) { @@ -4001,7 +4231,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // must be passed right-justified in the stack doubleword, and // in the GPR, if one is available. SDValue StoreOff; - if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { + if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } else @@ -4040,6 +4270,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: if (isVarArg) { // These go aligned on the stack, or in the corresponding R registers // when within range. The Darwin PPC ABI doc claims they also go in @@ -4063,7 +4295,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); + + unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || + Arg.getSimpleValueType() == MVT::v2i64) ? + VSRH[VR_idx] : VR[VR_idx]; + ++VR_idx; + + RegsToPass.push_back(std::make_pair(VReg, Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { @@ -4083,7 +4321,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // stack space allocated at the end. if (VR_idx != NumVRs) { // Doesn't have GPR space allocated. - RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); + unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || + Arg.getSimpleValueType() == MVT::v2i64) ? + VSRH[VR_idx] : VR[VR_idx]; + ++VR_idx; + + RegsToPass.push_back(std::make_pair(VReg, Arg)); } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, @@ -4143,7 +4386,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { unsigned NumOps = Outs.size(); @@ -4184,7 +4427,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else @@ -4209,17 +4453,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - static const uint16_t GPR_32[] = { // 32-bit registers. + static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const uint16_t GPR_64[] = { // 64-bit registers. + static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -4227,7 +4471,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); - const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; @@ -4308,11 +4552,15 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, continue; } - switch (Arg.getValueType().getSimpleVT().SimpleTy) { + switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != NumGPRs) { + if (Arg.getValueType() == MVT::i1) + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, @@ -4500,7 +4748,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + SDLoc dl, SelectionDAG &DAG) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), @@ -4549,7 +4797,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { // When we pop the dynamic allocation we need to restore the SP link. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Get the corect type for pointers. EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -4634,7 +4882,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Get the corect type for pointers. EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -4651,7 +4899,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1)); @@ -4659,11 +4907,60 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1)); } +SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && + "Custom lowering only for i1 loads"); + + // First, load 8 bits into 32 bits, then truncate to 1 bit. + + SDLoc dl(Op); + LoadSDNode *LD = cast(Op); + + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand *MMO = LD->getMemOperand(); + + SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, + BasePtr, MVT::i8, MMO); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); + + SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOperand(1).getValueType() == MVT::i1 && + "Custom lowering only for i1 stores"); + + // First, zero extend to 32 bits, then use a truncating store to 8 bits. + + SDLoc dl(Op); + StoreSDNode *ST = cast(Op); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + MachineMemOperand *MMO = ST->getMemOperand(); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); + return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); +} + +// FIXME: Remove this once the ANDI glue bug is fixed: +SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && + "Custom lowering only for i1 results"); + + SDLoc DL(Op); + return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, + Op.getOperand(0)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -4672,22 +4969,37 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; - ISD::CondCode CC = cast(Op.getOperand(4))->get(); + // We might be able to do better than this under some circumstances, but in + // general, fsel-based lowering of select is a finite-math-only optimization. + // For more information, see section F.3 of the 2.06 ISA specification. + if (!DAG.getTarget().Options.NoInfsFPMath || + !DAG.getTarget().Options.NoNaNsFPMath) + return Op; - // Cannot handle SETEQ/SETNE. - if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; + ISD::CondCode CC = cast(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); EVT CmpVT = Op.getOperand(0).getValueType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. + SDValue Sel1; if (isFloatingPointZero(RHS)) switch (CC) { default: break; // SETUO etc aren't handled by fsel. + case ISD::SETNE: + std::swap(TV, FV); + case ISD::SETEQ: + if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); + if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits + Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt @@ -4710,44 +5022,55 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. + case ISD::SETNE: + std::swap(TV, FV); + case ISD::SETEQ: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits + Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); } return Op; } // FIXME: Split this code up when LegalizeDAGTypes lands. SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; - switch (Op.getValueType().getSimpleVT().SimpleTy) { + switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : @@ -4799,11 +5122,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); + if (Op.getOperand(0).getValueType() == MVT::i1) + return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), + DAG.getConstantFP(1.0, Op.getValueType()), + DAG.getConstantFP(0.0, Op.getValueType())); + assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); @@ -4933,7 +5261,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); /* The rounding mode is in bits 30:31 of FPSR, and has the following settings: @@ -4999,7 +5327,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"); @@ -5027,7 +5355,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && @@ -5055,7 +5383,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && @@ -5090,7 +5418,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { /// BuildSplatI - Build a canonical splati of Val with an element size of /// SplatSize. Cast the result to VT. static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG, SDLoc dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); static const EVT VTys[] = { // canonical VT to use for each size. @@ -5114,10 +5442,20 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); } +/// BuildIntrinsicOp - Return a unary operator intrinsic node with the +/// specified intrinsic ID. +static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, + SelectionDAG &DAG, SDLoc dl, + EVT DestVT = MVT::Other) { + if (DestVT == MVT::Other) DestVT = Op.getValueType(); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(IID, MVT::i32), Op); +} + /// BuildIntrinsicOp - Return a binary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, DebugLoc dl, + SelectionDAG &DAG, SDLoc dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, @@ -5128,7 +5466,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, SDValue Op2, SelectionDAG &DAG, - DebugLoc dl, EVT DestVT = MVT::Other) { + SDLoc dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); @@ -5138,7 +5476,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, - EVT VT, SelectionDAG &DAG, DebugLoc dl) { + EVT VT, SelectionDAG &DAG, SDLoc dl) { // Force LHS/RHS to be the right type. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); @@ -5157,7 +5495,7 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, // sequence of ops that should be used. SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); @@ -5313,7 +5651,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); @@ -5392,7 +5730,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast(Op); @@ -5559,7 +5897,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); int CompareOpc; bool isDot; if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) @@ -5584,7 +5922,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. - SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, + SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1)); @@ -5621,9 +5959,33 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return Flags; } +SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int + // instructions), but for smaller types, we need to first extend up to v2i32 + // before doing going farther. + if (Op.getValueType() == MVT::v2i64) { + EVT ExtVT = cast(Op.getOperand(1))->getVT(); + if (ExtVT != MVT::v2i32) { + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); + Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, + DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), + ExtVT.getVectorElementType(), 4))); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); + Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, + DAG.getValueType(MVT::v2i32)); + } + + return Op; + } + + return SDValue(); +} + SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); @@ -5640,7 +6002,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); @@ -5717,6 +6079,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VAARG: return LowerVAARG(Op, DAG, PPCSubTarget); + case ISD::VACOPY: + return LowerVACOPY(Op, DAG, PPCSubTarget); + case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); @@ -5724,10 +6089,13 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, - Op.getDebugLoc()); + SDLoc(Op)); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -5742,8 +6110,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + // For counter-based loop handling. + case ISD::INTRINSIC_W_CHAIN: return SDValue(); + // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); @@ -5754,10 +6126,26 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { const TargetMachine &TM = getTargetMachine(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); + case ISD::INTRINSIC_W_CHAIN: { + if (cast(N->getOperand(1))->getZExtValue() != + Intrinsic::ppc_is_decremented_ctr_nonzero) + break; + + assert(N->getValueType(0) == MVT::i1 && + "Unexpected result type for CTR decrement intrinsic"); + EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); + SDVTList VTs = DAG.getVTList(SVT, MVT::Other); + SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), + N->getOperand(1)); + + Results.push_back(NewInt); + Results.push_back(NewInt.getValue(1)); + break; + } case ISD::VAARG: { if (!TM.getSubtarget().isSVR4ABI() || TM.getSubtarget().isPPC64()) @@ -5793,6 +6181,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: + // LowerFP_TO_INT() can only handle f32 and f64. + if (N->getOperand(0).getValueType() == MVT::ppcf128) + return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; } @@ -5825,8 +6216,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -5894,8 +6284,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -6047,7 +6436,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, - llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // Note that the structure of the jmp_buf used here is not compatible @@ -6064,6 +6453,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // thisMBB: const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); + const int64_t BPOffset = 4 * PVT.getStoreSize(); // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); @@ -6073,15 +6463,32 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) - .addImm(TOCOffset / 4) + .addImm(TOCOffset) .addReg(BufReg); - MIB.setMemRefs(MMOBegin, MMOEnd); } + // Naked functions never have a base pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned BaseReg; + if (MF->getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1; + else + BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP; + + MIB = BuildMI(*thisMBB, MI, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW)) + .addReg(BaseReg) + .addImm(BPOffset) + .addReg(BufReg); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Setup - MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB); - MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); + const PPCRegisterInfo *TRI = + static_cast(getTargetMachine().getRegisterInfo()); + MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); @@ -6101,7 +6508,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, if (PPCSubTarget.isPPC64()) { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) .addReg(LabelReg) - .addImm(LabelOffset / 4) + .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) @@ -6148,12 +6555,14 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30; MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); + const int64_t BPOffset = 4 * PVT.getStoreSize(); unsigned BufReg = MI->getOperand(0).getReg(); @@ -6174,7 +6583,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Reload IP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) - .addImm(LabelOffset / 4) + .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) @@ -6186,7 +6595,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Reload SP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) - .addImm(SPOffset / 4) + .addImm(SPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) @@ -6195,13 +6604,22 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, } MIB.setMemRefs(MMOBegin, MMOEnd); - // FIXME: When we also support base pointers, that register must also be - // restored here. + // Reload BP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) + .addImm(BPOffset) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) + .addImm(BPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); // Reload TOC if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) - .addImm(TOCOffset / 4) + .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); @@ -6238,37 +6656,32 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8)) { - unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ? - PPC::ISEL8 : PPC::ISEL; - unsigned SelectPred = MI->getOperand(4).getImm(); - DebugLoc dl = MI->getDebugLoc(); - - unsigned SubIdx; - bool SwapOps; - switch (SelectPred) { - default: llvm_unreachable("invalid predicate for isel"); - case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; - case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; - case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; - case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; - case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; - case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; - case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; - case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; - } + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8)) { + SmallVector Cond; + if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8) + Cond.push_back(MI->getOperand(4)); + else + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); + Cond.push_back(MI->getOperand(1)); - BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(SwapOps? 3 : 2).getReg()) - .addReg(MI->getOperand(SwapOps? 2 : 3).getReg()) - .addReg(MI->getOperand(1).getReg(), 0, SubIdx); + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), + Cond, MI->getOperand(2).getReg(), + MI->getOperand(3).getReg()); } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_VRRC) { - - + MI->getOpcode() == PPC::SELECT_CC_VRRC || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8 || + MI->getOpcode() == PPC::SELECT_F4 || + MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_VRRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. @@ -6282,23 +6695,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + if (MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8 || + MI->getOpcode() == PPC::SELECT_F4 || + MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_VRRC) { + BuildMI(BB, dl, TII->get(PPC::BC)) + .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + } else { + unsigned SelectPred = MI->getOperand(4).getImm(); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + } // copy0MBB: // %FalseValue = ... @@ -6400,8 +6821,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: @@ -6471,8 +6891,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -6621,51 +7040,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); - } else if (MI->getOpcode() == PPC::FRINDrint || - MI->getOpcode() == PPC::FRINSrint) { - bool isf32 = MI->getOpcode() == PPC::FRINSrint; - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - DebugLoc dl = MI->getDebugLoc(); + } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT || + MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { + unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? + PPC::ANDIo8 : PPC::ANDIo; + bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); - - // Perform the rounding. - BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) - .addReg(Src); - - // Compare the results. - BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) - .addReg(Dest).addReg(Src); + unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? + &PPC::GPRCRegClass : + &PPC::G8RCRegClass); - // If the results were not equal, then set the FPSCR XX bit. - MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, midMBB); - F->insert(It, exitMBB); - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); - - BB->addSuccessor(midMBB); - BB->addSuccessor(exitMBB); - - BB = midMBB; - - // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set - // the FI bit here because that will not automatically set XX also, - // and XX is what libm interprets as the FE_INEXACT flag. - BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); - BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); - - BB->addSuccessor(exitMBB); - - BB = exitMBB; + DebugLoc dl = MI->getDebugLoc(); + BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) + .addReg(MI->getOperand(1).getReg()).addImm(1); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), + MI->getOperand(0).getReg()) + .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -6687,7 +7082,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || (VT == MVT::f64 && PPCSubTarget.hasFRE()) || - (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { + (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) || + (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal, we need to find the zero of the function: @@ -6705,7 +7101,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, ++Iterations; SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue FPOne = DAG.getConstantFP(1.0, VT.getScalarType()); @@ -6749,7 +7145,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || - (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { + (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) || + (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal sqrt, we need to find the zero of the function: @@ -6767,7 +7164,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, ++Iterations; SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue FPThreeHalves = DAG.getConstantFP(1.5, VT.getScalarType()); @@ -6811,11 +7208,650 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, return SDValue(); } +// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does +// not enforce equality of the chain operands. +static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, + unsigned Bytes, int Dist, + SelectionDAG &DAG) { + EVT VT = LS->getMemoryVT(); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LS->getBasePtr(); + SDValue BaseLoc = Base->getBasePtr(); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + int FI = cast(Loc)->getIndex(); + int BFI = cast(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + + // Handle X+C + if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && + cast(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) + return true; + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const GlobalValue *GV1 = nullptr; + const GlobalValue *GV2 = nullptr; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + +// Return true is there is a nearyby consecutive load to the one provided +// (regardless of alignment). We search up and down the chain, looking though +// token factors and other loads (but nothing else). As a result, a true +// results indicates that it is safe to create a new consecutive load adjacent +// to the load provided. +static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { + SDValue Chain = LD->getChain(); + EVT VT = LD->getMemoryVT(); + + SmallSet LoadRoots; + SmallVector Queue(1, Chain.getNode()); + SmallSet Visited; + + // First, search up the chain, branching to follow all token-factor operands. + // If we find a consecutive load, then we're done, otherwise, record all + // nodes just above the top-level loads and token factors. + while (!Queue.empty()) { + SDNode *ChainNext = Queue.pop_back_val(); + if (!Visited.insert(ChainNext)) + continue; + + if (LoadSDNode *ChainLD = dyn_cast(ChainNext)) { + if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) + return true; + + if (!Visited.count(ChainLD->getChain().getNode())) + Queue.push_back(ChainLD->getChain().getNode()); + } else if (ChainNext->getOpcode() == ISD::TokenFactor) { + for (SDNode::op_iterator O = ChainNext->op_begin(), + OE = ChainNext->op_end(); O != OE; ++O) + if (!Visited.count(O->getNode())) + Queue.push_back(O->getNode()); + } else + LoadRoots.insert(ChainNext); + } + + // Second, search down the chain, starting from the top-level nodes recorded + // in the first phase. These top-level nodes are the nodes just above all + // loads and token factors. Starting with their uses, recursively look though + // all loads (just the chain uses) and token factors to find a consecutive + // load. + Visited.clear(); + Queue.clear(); + + for (SmallSet::iterator I = LoadRoots.begin(), + IE = LoadRoots.end(); I != IE; ++I) { + Queue.push_back(*I); + + while (!Queue.empty()) { + SDNode *LoadRoot = Queue.pop_back_val(); + if (!Visited.insert(LoadRoot)) + continue; + + if (LoadSDNode *ChainLD = dyn_cast(LoadRoot)) + if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) + return true; + + for (SDNode::use_iterator UI = LoadRoot->use_begin(), + UE = LoadRoot->use_end(); UI != UE; ++UI) + if (((isa(*UI) && + cast(*UI)->getChain().getNode() == LoadRoot) || + UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) + Queue.push_back(*UI); + } + } + + return false; +} + +SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + assert(PPCSubTarget.useCRBits() && + "Expecting to be tracking CR bits"); + // If we're tracking CR bits, we need to be careful that we don't have: + // trunc(binary-ops(zext(x), zext(y))) + // or + // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) + // such that we're unnecessarily moving things into GPRs when it would be + // better to keep them in CR bits. + + // Note that trunc here can be an actual i1 trunc, or can be the effective + // truncation that comes from a setcc or select_cc. + if (N->getOpcode() == ISD::TRUNCATE && + N->getValueType(0) != MVT::i1) + return SDValue(); + + if (N->getOperand(0).getValueType() != MVT::i32 && + N->getOperand(0).getValueType() != MVT::i64) + return SDValue(); + + if (N->getOpcode() == ISD::SETCC || + N->getOpcode() == ISD::SELECT_CC) { + // If we're looking at a comparison, then we need to make sure that the + // high bits (all except for the first) don't matter the result. + ISD::CondCode CC = + cast(N->getOperand( + N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); + unsigned OpBits = N->getOperand(0).getValueSizeInBits(); + + if (ISD::isSignedIntSetCC(CC)) { + if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || + DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) + return SDValue(); + } else if (ISD::isUnsignedIntSetCC(CC)) { + if (!DAG.MaskedValueIsZero(N->getOperand(0), + APInt::getHighBitsSet(OpBits, OpBits-1)) || + !DAG.MaskedValueIsZero(N->getOperand(1), + APInt::getHighBitsSet(OpBits, OpBits-1))) + return SDValue(); + } else { + // This is neither a signed nor an unsigned comparison, just make sure + // that the high bits are equal. + APInt Op1Zero, Op1One; + APInt Op2Zero, Op2One; + DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One); + DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One); + + // We don't really care about what is known about the first bit (if + // anything), so clear it in all masks prior to comparing them. + Op1Zero.clearBit(0); Op1One.clearBit(0); + Op2Zero.clearBit(0); Op2One.clearBit(0); + + if (Op1Zero != Op2Zero || Op1One != Op2One) + return SDValue(); + } + } + + // We now know that the higher-order bits are irrelevant, we just need to + // make sure that all of the intermediate operations are bit operations, and + // all inputs are extensions. + if (N->getOperand(0).getOpcode() != ISD::AND && + N->getOperand(0).getOpcode() != ISD::OR && + N->getOperand(0).getOpcode() != ISD::XOR && + N->getOperand(0).getOpcode() != ISD::SELECT && + N->getOperand(0).getOpcode() != ISD::SELECT_CC && + N->getOperand(0).getOpcode() != ISD::TRUNCATE && + N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && + N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + + if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && + N->getOperand(1).getOpcode() != ISD::AND && + N->getOperand(1).getOpcode() != ISD::OR && + N->getOperand(1).getOpcode() != ISD::XOR && + N->getOperand(1).getOpcode() != ISD::SELECT && + N->getOperand(1).getOpcode() != ISD::SELECT_CC && + N->getOperand(1).getOpcode() != ISD::TRUNCATE && + N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && + N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + + SmallVector Inputs; + SmallVector BinOps, PromOps; + SmallPtrSet Visited; + + for (unsigned i = 0; i < 2; ++i) { + if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && + N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || + isa(N->getOperand(i))) + Inputs.push_back(N->getOperand(i)); + else + BinOps.push_back(N->getOperand(i)); + + if (N->getOpcode() == ISD::TRUNCATE) + break; + } + + // Visit all inputs, collect all binary operations (and, or, xor and + // select) that are all fed by extensions. + while (!BinOps.empty()) { + SDValue BinOp = BinOps.back(); + BinOps.pop_back(); + + if (!Visited.insert(BinOp.getNode())) + continue; + + PromOps.push_back(BinOp); + + for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { + // The condition of the select is not promoted. + if (BinOp.getOpcode() == ISD::SELECT && i == 0) + continue; + if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) + continue; + + if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && + BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || + isa(BinOp.getOperand(i))) { + Inputs.push_back(BinOp.getOperand(i)); + } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || + BinOp.getOperand(i).getOpcode() == ISD::OR || + BinOp.getOperand(i).getOpcode() == ISD::XOR || + BinOp.getOperand(i).getOpcode() == ISD::SELECT || + BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || + BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || + BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { + BinOps.push_back(BinOp.getOperand(i)); + } else { + // We have an input that is not an extension or another binary + // operation; we'll abort this transformation. + return SDValue(); + } + } + } + + // Make sure that this is a self-contained cluster of operations (which + // is not quite the same thing as saying that everything has only one + // use). + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa(Inputs[i])) + continue; + + for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), + UE = Inputs[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == Inputs[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == Inputs[i] || + User->getOperand(1) == Inputs[i]) + return SDValue(); + } + } + } + + for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { + for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), + UE = PromOps[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == PromOps[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == PromOps[i] || + User->getOperand(1) == PromOps[i]) + return SDValue(); + } + } + } + + // Replace all inputs with the extension operand. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + // Constants may have users outside the cluster of to-be-promoted nodes, + // and so we need to replace those as we do the promotions. + if (isa(Inputs[i])) + continue; + else + DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); + } + + // Replace all operations (these are all the same, but have a different + // (i1) return type). DAG.getNode will validate that the types of + // a binary operator match, so go through the list in reverse so that + // we've likely promoted both operands first. Any intermediate truncations or + // extensions disappear. + while (!PromOps.empty()) { + SDValue PromOp = PromOps.back(); + PromOps.pop_back(); + + if (PromOp.getOpcode() == ISD::TRUNCATE || + PromOp.getOpcode() == ISD::SIGN_EXTEND || + PromOp.getOpcode() == ISD::ZERO_EXTEND || + PromOp.getOpcode() == ISD::ANY_EXTEND) { + if (!isa(PromOp.getOperand(0)) && + PromOp.getOperand(0).getValueType() != MVT::i1) { + // The operand is not yet ready (see comment below). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SDValue RepValue = PromOp.getOperand(0); + if (isa(RepValue)) + RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); + + DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); + continue; + } + + unsigned C; + switch (PromOp.getOpcode()) { + default: C = 0; break; + case ISD::SELECT: C = 1; break; + case ISD::SELECT_CC: C = 2; break; + } + + if ((!isa(PromOp.getOperand(C)) && + PromOp.getOperand(C).getValueType() != MVT::i1) || + (!isa(PromOp.getOperand(C+1)) && + PromOp.getOperand(C+1).getValueType() != MVT::i1)) { + // The to-be-promoted operands of this node have not yet been + // promoted (this should be rare because we're going through the + // list backward, but if one of the operands has several users in + // this cluster of to-be-promoted nodes, it is possible). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SmallVector Ops(PromOp.getNode()->op_begin(), + PromOp.getNode()->op_end()); + + // If there are any constant inputs, make sure they're replaced now. + for (unsigned i = 0; i < 2; ++i) + if (isa(Ops[C+i])) + Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); + + DAG.ReplaceAllUsesOfValueWith(PromOp, + DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, + Ops.data(), Ops.size())); + } + + // Now we're left with the initial truncation itself. + if (N->getOpcode() == ISD::TRUNCATE) + return N->getOperand(0); + + // Otherwise, this is a comparison. The operands to be compared have just + // changed type (to i1), but everything else is the same. + return SDValue(N, 0); +} + +SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + // If we're tracking CR bits, we need to be careful that we don't have: + // zext(binary-ops(trunc(x), trunc(y))) + // or + // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) + // such that we're unnecessarily moving things into CR bits that can more + // efficiently stay in GPRs. Note that if we're not certain that the high + // bits are set as required by the final extension, we still may need to do + // some masking to get the proper behavior. + + // This same functionality is important on PPC64 when dealing with + // 32-to-64-bit extensions; these occur often when 32-bit values are used as + // the return values of functions. Because it is so similar, it is handled + // here as well. + + if (N->getValueType(0) != MVT::i32 && + N->getValueType(0) != MVT::i64) + return SDValue(); + + if (!((N->getOperand(0).getValueType() == MVT::i1 && + PPCSubTarget.useCRBits()) || + (N->getOperand(0).getValueType() == MVT::i32 && + PPCSubTarget.isPPC64()))) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::AND && + N->getOperand(0).getOpcode() != ISD::OR && + N->getOperand(0).getOpcode() != ISD::XOR && + N->getOperand(0).getOpcode() != ISD::SELECT && + N->getOperand(0).getOpcode() != ISD::SELECT_CC) + return SDValue(); + + SmallVector Inputs; + SmallVector BinOps(1, N->getOperand(0)), PromOps; + SmallPtrSet Visited; + + // Visit all inputs, collect all binary operations (and, or, xor and + // select) that are all fed by truncations. + while (!BinOps.empty()) { + SDValue BinOp = BinOps.back(); + BinOps.pop_back(); + + if (!Visited.insert(BinOp.getNode())) + continue; + + PromOps.push_back(BinOp); + + for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { + // The condition of the select is not promoted. + if (BinOp.getOpcode() == ISD::SELECT && i == 0) + continue; + if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) + continue; + + if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || + isa(BinOp.getOperand(i))) { + Inputs.push_back(BinOp.getOperand(i)); + } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || + BinOp.getOperand(i).getOpcode() == ISD::OR || + BinOp.getOperand(i).getOpcode() == ISD::XOR || + BinOp.getOperand(i).getOpcode() == ISD::SELECT || + BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { + BinOps.push_back(BinOp.getOperand(i)); + } else { + // We have an input that is not a truncation or another binary + // operation; we'll abort this transformation. + return SDValue(); + } + } + } + + // Make sure that this is a self-contained cluster of operations (which + // is not quite the same thing as saying that everything has only one + // use). + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa(Inputs[i])) + continue; + + for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), + UE = Inputs[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == Inputs[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == Inputs[i] || + User->getOperand(1) == Inputs[i]) + return SDValue(); + } + } + } + + for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { + for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), + UE = PromOps[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == PromOps[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == PromOps[i] || + User->getOperand(1) == PromOps[i]) + return SDValue(); + } + } + } + + unsigned PromBits = N->getOperand(0).getValueSizeInBits(); + bool ReallyNeedsExt = false; + if (N->getOpcode() != ISD::ANY_EXTEND) { + // If all of the inputs are not already sign/zero extended, then + // we'll still need to do that at the end. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa(Inputs[i])) + continue; + + unsigned OpBits = + Inputs[i].getOperand(0).getValueSizeInBits(); + assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); + + if ((N->getOpcode() == ISD::ZERO_EXTEND && + !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), + APInt::getHighBitsSet(OpBits, + OpBits-PromBits))) || + (N->getOpcode() == ISD::SIGN_EXTEND && + DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < + (OpBits-(PromBits-1)))) { + ReallyNeedsExt = true; + break; + } + } + } + + // Replace all inputs, either with the truncation operand, or a + // truncation or extension to the final output type. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + // Constant inputs need to be replaced with the to-be-promoted nodes that + // use them because they might have users outside of the cluster of + // promoted nodes. + if (isa(Inputs[i])) + continue; + + SDValue InSrc = Inputs[i].getOperand(0); + if (Inputs[i].getValueType() == N->getValueType(0)) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); + else if (N->getOpcode() == ISD::SIGN_EXTEND) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); + else + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); + } + + // Replace all operations (these are all the same, but have a different + // (promoted) return type). DAG.getNode will validate that the types of + // a binary operator match, so go through the list in reverse so that + // we've likely promoted both operands first. + while (!PromOps.empty()) { + SDValue PromOp = PromOps.back(); + PromOps.pop_back(); + + unsigned C; + switch (PromOp.getOpcode()) { + default: C = 0; break; + case ISD::SELECT: C = 1; break; + case ISD::SELECT_CC: C = 2; break; + } + + if ((!isa(PromOp.getOperand(C)) && + PromOp.getOperand(C).getValueType() != N->getValueType(0)) || + (!isa(PromOp.getOperand(C+1)) && + PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { + // The to-be-promoted operands of this node have not yet been + // promoted (this should be rare because we're going through the + // list backward, but if one of the operands has several users in + // this cluster of to-be-promoted nodes, it is possible). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SmallVector Ops(PromOp.getNode()->op_begin(), + PromOp.getNode()->op_end()); + + // If this node has constant inputs, then they'll need to be promoted here. + for (unsigned i = 0; i < 2; ++i) { + if (!isa(Ops[C+i])) + continue; + if (Ops[C+i].getValueType() == N->getValueType(0)) + continue; + + if (N->getOpcode() == ISD::SIGN_EXTEND) + Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + else + Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + } + + DAG.ReplaceAllUsesOfValueWith(PromOp, + DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), + Ops.data(), Ops.size())); + } + + // Now we're left with the initial extension itself. + if (!ReallyNeedsExt) + return N->getOperand(0); + + // To zero extend, just mask off everything except for the first bit (in the + // i1 case). + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), + DAG.getConstant(APInt::getLowBitsSet( + N->getValueSizeInBits(0), PromBits), + N->getValueType(0))); + + assert(N->getOpcode() == ISD::SIGN_EXTEND && + "Invalid extension type"); + EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); + SDValue ShiftCst = + DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); + return DAG.getNode(ISD::SRA, dl, N->getValueType(0), + DAG.getNode(ISD::SHL, dl, N->getValueType(0), + N->getOperand(0), ShiftCst), ShiftCst); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const TargetMachine &TM = getTargetMachine(); SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (N->getOpcode()) { default: break; case PPCISD::SHL: @@ -6837,6 +7873,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); } break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + return DAGCombineExtBoolTrunc(N, DCI); + case ISD::TRUNCATE: + case ISD::SETCC: + case ISD::SELECT_CC: + return DAGCombineTruncBoolExt(N, DCI); case ISD::FDIV: { assert(TM.Options.UnsafeFPMath && "Reciprocal estimates require UnsafeFPMath"); @@ -6844,7 +7888,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (N->getOperand(1).getOpcode() == ISD::FSQRT) { SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), N->getOperand(0), RV); @@ -6854,9 +7898,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); - RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(), + RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), N->getValueType(0), RV); DCI.AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), @@ -6867,9 +7911,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); - RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(), + RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), N->getValueType(0), RV, N->getOperand(1).getOperand(1)); DCI.AddToWorklist(RV.getNode()); @@ -6879,7 +7923,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), N->getOperand(0), RV); @@ -6894,11 +7938,31 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the // reciprocal sqrt. SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); RV = DAGCombineFastRecip(RV, DCI); - if (RV.getNode() != 0) + if (RV.getNode()) { + // Unfortunately, RV is now NaN if the input was exactly 0. Select out + // this case and force the answer to 0. + + EVT VT = RV.getValueType(); + + SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType()); + if (VT.isVector()) { + assert(VT.getVectorNumElements() == 4 && "Unknown vector type"); + Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero); + } + + SDValue ZeroCmp = + DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT), + N->getOperand(0), Zero, ISD::SETEQ); + DCI.AddToWorklist(ZeroCmp.getNode()); + DCI.AddToWorklist(RV.getNode()); + + RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT, + ZeroCmp, Zero, RV); return RV; + } } } @@ -6986,6 +8050,160 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); } + break; + case ISD::LOAD: { + LoadSDNode *LD = cast(N); + EVT VT = LD->getValueType(0); + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); + if (ISD::isNON_EXTLoad(N) && VT.isVector() && + TM.getSubtarget().hasAltivec() && + (VT == MVT::v16i8 || VT == MVT::v8i16 || + VT == MVT::v4i32 || VT == MVT::v4f32) && + LD->getAlignment() < ABIAlignment) { + // This is a type-legal unaligned Altivec load. + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // This implements the loading of unaligned vectors as described in + // the venerable Apple Velocity Engine overview. Specifically: + // https://developer.apple.com/hardwaredrivers/ve/alignment.html + // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html + // + // The general idea is to expand a sequence of one or more unaligned + // loads into a alignment-based permutation-control instruction (lvsl), + // a series of regular vector loads (which always truncate their + // input address to an aligned address), and a series of permutations. + // The results of these permutations are the requested loaded values. + // The trick is that the last "extra" load is not taken from the address + // you might suspect (sizeof(vector) bytes after the last requested + // load), but rather sizeof(vector) - 1 bytes after the last + // requested vector. The point of this is to avoid a page fault if the + // base address happened to be aligned. This works because if the base + // address is aligned, then adding less than a full vector length will + // cause the last vector in the sequence to be (re)loaded. Otherwise, + // the next vector will be fetched as you might suspect was necessary. + + // We might be able to reuse the permutation generation from + // a different base address offset from this one by an aligned amount. + // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this + // optimization later. + SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr, + DAG, dl, MVT::v16i8); + + // Refine the alignment of the original load (a "new" load created here + // which was identical to the first except for the alignment would be + // merged with the existing node regardless). + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(LD->getPointerInfo(), + LD->getMemOperand()->getFlags(), + LD->getMemoryVT().getStoreSize(), + ABIAlignment); + LD->refineAlignment(MMO); + SDValue BaseLoad = SDValue(LD, 0); + + // Note that the value of IncOffset (which is provided to the next + // load's pointer info offset value, and thus used to calculate the + // alignment), and the value of IncValue (which is actually used to + // increment the pointer value) are different! This is because we + // require the next load to appear to be aligned, even though it + // is actually offset from the base pointer by a lesser amount. + int IncOffset = VT.getSizeInBits() / 8; + int IncValue = IncOffset; + + // Walk (both up and down) the chain looking for another load at the real + // (aligned) offset (the alignment of the other load does not matter in + // this case). If found, then do not use the offset reduction trick, as + // that will prevent the loads from being later combined (as they would + // otherwise be duplicates). + if (!findConsecutiveLoad(LD, DAG)) + --IncValue; + + SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + + SDValue ExtraLoad = + DAG.getLoad(VT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncOffset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), ABIAlignment); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + BaseLoad.getValue(1), ExtraLoad.getValue(1)); + + if (BaseLoad.getValueType() != MVT::v4i32) + BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad); + + if (ExtraLoad.getValueType() != MVT::v4i32) + ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); + + SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + BaseLoad, ExtraLoad, PermCntl, DAG, dl); + + if (VT != MVT::v4i32) + Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); + + // Now we need to be really careful about how we update the users of the + // original load. We cannot just call DCI.CombineTo (or + // DAG.ReplaceAllUsesWith for that matter), because the load still has + // uses created here (the permutation for example) that need to stay. + SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + while (UI != UE) { + SDUse &Use = UI.getUse(); + SDNode *User = *UI; + // Note: BaseLoad is checked here because it might not be N, but a + // bitcast of N. + if (User == Perm.getNode() || User == BaseLoad.getNode() || + User == TF.getNode() || Use.getResNo() > 1) { + ++UI; + continue; + } + + SDValue To = Use.getResNo() ? TF : Perm; + ++UI; + + SmallVector Ops; + for (SDNode::op_iterator O = User->op_begin(), + OE = User->op_end(); O != OE; ++O) { + if (*O == Use) + Ops.push_back(To); + else + Ops.push_back(*O); + } + + DAG.UpdateNodeOperands(User, Ops.data(), Ops.size()); + } + + return SDValue(N, 0); + } + } + break; + case ISD::INTRINSIC_WO_CHAIN: + if (cast(N->getOperand(0))->getZExtValue() == + Intrinsic::ppc_altivec_lvsl && + N->getOperand(1)->getOpcode() == ISD::ADD) { + SDValue Add = N->getOperand(1); + + if (DAG.MaskedValueIsZero(Add->getOperand(1), + APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( + Add.getValueType().getScalarType().getSizeInBits()))) { + SDNode *BasePtr = Add->getOperand(0).getNode(); + for (SDNode::use_iterator UI = BasePtr->use_begin(), + UE = BasePtr->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + cast(UI->getOperand(0))->getZExtValue() == + Intrinsic::ppc_altivec_lvsl) { + // We've found another LVSL, and this address if an aligned + // multiple of that one. The results will be the same, so use the + // one we've just found instead. + + return SDValue(*UI, 0); + } + } + } + } + break; case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. @@ -7037,7 +8255,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, !N->getOperand(2).hasOneUse()) { // Scan all of the users of the LHS, looking for VCMPo's that match. - SDNode *VCMPoNode = 0; + SDNode *VCMPoNode = nullptr; SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); @@ -7058,9 +8276,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. - SDNode *FlagUser = 0; + SDNode *FlagUser = nullptr; for (SDNode::use_iterator UI = VCMPoNode->use_begin(); - FlagUser == 0; ++UI) { + FlagUser == nullptr; ++UI) { assert(UI != VCMPoNode->use_end() && "Didn't find user!"); SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { @@ -7071,20 +8289,72 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } - // If the user is a MFCR instruction, we know this is safe. Otherwise we - // give up for right now. - if (FlagUser->getOpcode() == PPCISD::MFCR) + // If the user is a MFOCRF instruction, we know this is safe. + // Otherwise we give up for right now. + if (FlagUser->getOpcode() == PPCISD::MFOCRF) return SDValue(VCMPoNode, 0); } break; } + case ISD::BRCOND: { + SDValue Cond = N->getOperand(1); + SDValue Target = N->getOperand(2); + + if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast(Cond.getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero) { + + // We now need to make the intrinsic dead (it cannot be instruction + // selected). + DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); + assert(Cond.getNode()->hasOneUse() && + "Counter decrement has more than one use"); + + return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, + N->getOperand(0), Target); + } + } + break; case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so - // that we don't have to do a MFCR: instead, branch directly on CR6. This + // that we don't have to do a MFOCRF: instead, branch directly on CR6. This // lowering is done pre-legalize, because the legalizer lowers the predicate // compare down to code that is difficult to reassemble. ISD::CondCode CC = cast(N->getOperand(1))->get(); SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); + + // Sometimes the promoted value of the intrinsic is ANDed by some non-zero + // value. If so, pass-through the AND to get to the intrinsic. + if (LHS.getOpcode() == ISD::AND && + LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast(LHS.getOperand(0).getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero && + isa(LHS.getOperand(1)) && + !cast(LHS.getOperand(1))->getConstantIntValue()-> + isZero()) + LHS = LHS.getOperand(0); + + if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast(LHS.getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero && + isa(RHS)) { + assert((CC == ISD::SETEQ || CC == ISD::SETNE) && + "Counter decrement comparison is not EQ or NE"); + + unsigned Val = cast(RHS)->getZExtValue(); + bool isBDNZ = (CC == ISD::SETEQ && Val) || + (CC == ISD::SETNE && !Val); + + // We now need to make the intrinsic dead (it cannot be instruction + // selected). + DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); + assert(LHS.getNode()->hasOneUse() && + "Counter decrement has more than one use"); + + return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, + N->getOperand(0), N->getOperand(4)); + } + int CompareOpc; bool isDot; @@ -7209,6 +8479,11 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const { // suboptimal. return C_Memory; } + } else if (Constraint == "wc") { // individual CR bits. + return C_RegisterClass; + } else if (Constraint == "wa" || Constraint == "wd" || + Constraint == "wf" || Constraint == "ws") { + return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); } @@ -7223,10 +8498,21 @@ PPCTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); + // Look at the constraint type. + if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) + return CW_Register; // an individual CR bit. + else if ((StringRef(constraint) == "wa" || + StringRef(constraint) == "wd" || + StringRef(constraint) == "wf") && + type->isVectorTy()) + return CW_Register; + else if (StringRef(constraint) == "ws" && type->isDoubleTy()) + return CW_Register; + switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); @@ -7259,7 +8545,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight( std::pair PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { @@ -7282,9 +8568,33 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } - } - - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + } else if (Constraint == "wc") { // an individual CR bit. + return std::make_pair(0U, &PPC::CRBITRCRegClass); + } else if (Constraint == "wa" || Constraint == "wd" || + Constraint == "wf") { + return std::make_pair(0U, &PPC::VSRCRegClass); + } else if (Constraint == "ws") { + return std::make_pair(0U, &PPC::VSFRCRegClass); + } + + std::pair R = + TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + + // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers + // (which we call X[0-9]+). If a 64-bit value has been requested, and a + // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent + // register. + // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use + // the AsmName field from *RegisterInfo.td, then this would not be necessary. + if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() && + PPC::GPRCRegClass.contains(R.first)) { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + return std::make_pair(TRI->getMatchingSuperReg(R.first, + PPC::sub_32, &PPC::G8RCRegClass), + &PPC::G8RCRegClass); + } + + return R; } @@ -7294,7 +8604,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0,0); + SDValue Result; // Only support length 1 constraints. if (Constraint.length() > 1) return; @@ -7394,25 +8704,16 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } -/// isLegalAddressImmediate - Return true if the integer value can be used -/// as the offset of the target addressing mode for load / store of the -/// given type. -bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ - // PPC allows a sign-extended 16-bit immediate field. - return (V > -(1 << 16) && V < (1 << 16)-1); -} - -bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { - return false; -} - SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); - DebugLoc dl = Op.getDebugLoc(); + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); // Make sure the function does not optimize away the store of the RA to @@ -7442,7 +8743,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -7499,7 +8800,44 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +/// \brief Returns true if it is beneficial to convert a load of a constant +/// to just the constant itself. +bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0 || BitSize > 64) + return false; + return true; +} + +bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); + unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); + return NumBits1 == 64 && NumBits2 == 32; +} + +bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isInteger() || !VT2.isInteger()) + return false; + unsigned NumBits1 = VT1.getSizeInBits(); + unsigned NumBits2 = VT2.getSizeInBits(); + return NumBits1 == 64 && NumBits2 == 32; +} + +bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return isInt<16>(Imm) || isUInt<16>(Imm); +} + +bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { + return isInt<16>(Imm) || isUInt<16>(Imm); +} + bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + unsigned, bool *Fast) const { if (DisablePPCUnaligned) return false; @@ -7513,8 +8851,14 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, if (!VT.isSimple()) return false; - if (VT.getSimpleVT().isVector()) - return false; + if (VT.getSimpleVT().isVector()) { + if (PPCSubTarget.hasVSX()) { + if (VT != MVT::v2f64 && VT != MVT::v2i64) + return false; + } else { + return false; + } + } if (VT == MVT::ppcf128) return false; @@ -7525,18 +8869,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return true; } -/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than -/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to -/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd -/// is expanded to mul + add. -bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { +bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: - case MVT::v4f32: return true; default: break; @@ -7545,10 +8886,25 @@ bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { return false; } +bool +PPCTargetLowering::shouldExpandBuildVectorWithShuffles( + EVT VT , unsigned DefinedValues) const { + if (VT == MVT::v2i64) + return false; + + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} + Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { - if (DisableILPPref) + if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; } +// Create a fast isel object. +FastISel * +PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) const { + return PPC::createFastISel(FuncInfo, LibInfo); +}