static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+// FIXME: Remove this once the bug has been fixed!
+extern cl::opt<bool> ANDIGlueBug;
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+ if (Subtarget->useCRBits()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (isPPC64 || Subtarget->hasFPCVT()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
+ }
+
+ // PowerPC does not support direct load / store of condition registers
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
+
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ if (ANDIGlueBug)
+ setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+ addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
+ }
+
// This is used in the ppcf128->int sequence. Note it has different semantics
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFCPSGN()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ } else {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ }
if (Subtarget->hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-
- // frin does not implement "ties to even." Thus, this is safe only in
- // fast-math mode.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-
- // These need to set FE_INEXACT, and use a custom inserter.
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- }
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, CTPOP or CTTZ
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
- // PowerPC does not have Select
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ if (!Subtarget->useCRBits()) {
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ }
// PowerPC wants to turn select_cc of FP into fsel when possible.
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ if (!Subtarget->useCRBits())
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
// PowerPC does not have BRCOND which requires SetCC
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ if (!Subtarget->useCRBits())
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
-
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ if (Subtarget->isSVR4ABI() && !isPPC64)
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+
// Use the default implementation.
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
- setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i32,
+ Subtarget->useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath) {
+ if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
+
+ if (Subtarget->hasVSX()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+
+ // Share the Altivec comparison restrictions.
+ setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STORE, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
+
+ addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
+
+ addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
+ setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+
+ // Vector operation legalization checks the result type of
+ // SIGN_EXTEND_INREG, overall legalization checks the inner type.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+
+ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
+ }
}
if (Subtarget->has64BitSupport()) {
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
+ if (Subtarget->useCRBits())
+ setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ if (Subtarget->useCRBits()) {
+ setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ }
+
// Use reciprocal estimates.
if (TM.Options.UnsafeFPMath) {
setTargetDAGCombine(ISD::FDIV);
setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
}
+ // With 32 condition bits, we don't need to sink (and duplicate) compares
+ // aggressively in CodeGenPrep.
+ if (Subtarget->useCRBits())
+ setHasMultipleConditionRegisters();
+
setMinFunctionAlignment(2);
if (PPCSubTarget.isDarwin())
setPrefFunctionAlignment(4);
setInsertFencesForAtomic(true);
- setSchedulingPreference(Sched::Hybrid);
+ if (Subtarget->enableMachineScheduler())
+ setSchedulingPreference(Sched::Source);
+ else
+ setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
}
}
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+ unsigned MaxMaxAlign) {
+ if (MaxAlign == MaxMaxAlign)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+ MaxAlign = 32;
+ else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == MaxMaxAlign)
+ break;
+ }
+ }
+}
+
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
- const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ if (PPCSubTarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
- if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- if (VTy->getBitWidth() >= 128)
- return 16;
-
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- if (PPCSubTarget.isPPC64())
- return 8;
-
- return 4;
+ unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
+ if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
+ getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ return Align;
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
- case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
- return MVT::i32;
+ return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
///
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- "PPC only supports shuffles by bytes!");
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!");
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- "PPC only supports shuffles by bytes!");
+ if (N->getValueType(0) != MVT::v16i8)
+ return -1;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
/// the constant being splatted. The ByteSize field indicates the number of
/// bytes of each element [124] -> [bhw].
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
- SDValue OpVal(0, 0);
+ SDValue OpVal(nullptr, 0);
// If ByteSize of the splat is bigger than the element size of the
// build_vector, then we have a case where we are checking for a splat where
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
- if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+ if (!UniquedVals[i&(Multiple-1)].getNode())
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
return SDValue(); // no match.
bool LeadingZero = true;
bool LeadingOnes = true;
for (unsigned i = 0; i != Multiple-1; ++i) {
- if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
+ if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
}
// Finally, check the least significant entry.
if (LeadingZero) {
- if (UniquedVals[Multiple-1].getNode() == 0)
+ if (!UniquedVals[Multiple-1].getNode())
return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
if (Val < 16)
return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
}
if (LeadingOnes) {
- if (UniquedVals[Multiple-1].getNode() == 0)
+ if (!UniquedVals[Multiple-1].getNode())
return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
// Check to see if this buildvec has a single non-undef value in its elements.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
+ if (!OpVal.getNode())
OpVal = N->getOperand(i);
else if (OpVal != N->getOperand(i))
return SDValue();
}
- if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
+ if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
unsigned ValSizeInBytes = EltSize;
uint64_t Value = 0;
return false;
}
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+ // FIXME: This does not handle the LWA case.
+ if (VT != MVT::i64)
+ return;
+
+ // NOTE: We'll exclude negative FIs here, which come from argument
+ // lowering, because there are no known test cases triggering this problem
+ // using packed structures (or similar). We can remove this exclusion if
+ // we find such a test case. The reason why this is so test-case driven is
+ // because this entire 'fixup' is only to prevent crashes (from the
+ // register scavenger) on not-really-valid inputs. For example, if we have:
+ // %a = alloca i1
+ // %b = bitcast i1* %a to i64*
+ // store i64* a, i64 b
+ // then the store should really be marked as 'align 1', but is not. If it
+ // were marked as 'align 1' then the indexed form would have been
+ // instruction-selected initially, and the problem this 'fixup' is preventing
+ // won't happen regardless.
+ if (FrameIdx < 0)
+ return;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ if (Align >= 4)
+ return;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasNonRISpills();
+}
+
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If Aligned is true, only accept displacements
Disp = DAG.getTargetConstant(imm, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
}
Disp = DAG.getTargetConstant(0, getPointerTy());
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- else
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else
Base = N;
return true; // [r+0]
}
/// GetLabelAccessInfo - Return true if we should reference labels using a
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
- unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+ unsigned &LoOpFlags,
+ const GlobalValue *GV = nullptr) {
HiOpFlags = PPCII::MO_HA;
LoOpFlags = PPCII::MO_LO;
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
+ // FIXME: TLS addresses currently use medium model code sequences,
+ // which is the most useful form. Eventually support for small and
+ // large models could be added if users need it, at the cost of
+ // additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
}
- if (!is64bit)
- llvm_unreachable("only local-exec is currently supported for ppc32");
-
if (Model == TLSModel::InitialExec) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
- PtrVT, GOTReg, TGA);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLS);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ } else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
- PtrVT, TGA, TPOffsetHi);
- return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ PtrVT, TGA, GOTPtr);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
if (Model == TLSModel::GeneralDynamic) {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
+ if (Op.getValueType() == MVT::v2i64) {
+ // When the operands themselves are v2i64 values, we need to do something
+ // special because VSX has no underlying comparison operations for these.
+ if (Op.getOperand(0).getValueType() == MVT::v2i64) {
+ // Equality can be handled by casting to the legal type for Altivec
+ // comparisons, everything else needs to be expanded.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getSetCC(dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
+ CC));
+ }
+
+ return SDValue();
+ }
+
+ // We handle most of these in the usual way.
+ return Op;
+ }
+
// If we're comparing for equality to zero, expose the fact that this is
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
// fold the new nodes.
false, false, false, 0);
}
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+ // We have to copy the entire va_list struct:
+ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+ return DAG.getMemcpy(Op.getOperand(0), Op,
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, MVT::i32), 8, false, true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
return Op.getOperand(0);
#include "PPCGenCallingConv.inc"
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+ return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const uint16_t ArgRegs[] = {
+ static const MCPhysReg ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const uint16_t ArgRegs[] = {
+ static const MCPhysReg ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const uint16_t *GetFPR() {
- static const uint16_t FPR[] = {
+static const MCPhysReg *GetFPR() {
+ static const MCPhysReg FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
/// the stack.
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
unsigned PtrByteSize) {
- unsigned ArgSize = ArgVT.getSizeInBits()/8;
+ unsigned ArgSize = ArgVT.getStoreSize();
if (Flags.isByVal())
ArgSize = Flags.getByValSize();
ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
switch (ValVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("ValVT not supported by formal arguments Lowering");
+ case MVT::i1:
case MVT::i32:
RC = &PPC::GPRCRegClass;
break;
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- RC = &PPC::F8RCRegClass;
+ if (PPCSubTarget.hasVSX())
+ RC = &PPC::VSFRCRegClass;
+ else
+ RC = &PPC::F8RCRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4f32:
RC = &PPC::VRRCRegClass;
break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ RC = &PPC::VSHRCRegClass;
+ break;
}
// Transform the arguments stored in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+ ValVT == MVT::i1 ? MVT::i32 : ValVT);
+
+ if (ValVT == MVT::i1)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
InVals.push_back(ArgValue);
} else {
// Argument stored in memory.
assert(VA.isMemLoc());
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ unsigned ArgSize = VA.getLocVT().getStoreSize();
int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
isImmutable);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- static const uint16_t GPArgRegs[] = {
+ static const MCPhysReg GPArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
- static const uint16_t FPArgRegs[] = {
+ static const MCPhysReg FPArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
}
// Set the size that is at least reserved in caller of this function. Tail
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const uint16_t GPR[] = {
+ static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ObjSize = ObjectVT.getStoreSize();
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
+ ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) {
if (isVarArg) {
MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += CalculateStackSlotSize(ObjectVT,
InVals.push_back(FIN);
continue;
}
+
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ CurArgOffset = ArgOffset;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (ObjSize < PtrByteSize)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, CurArgOffset),
+ MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
} else {
// For sizes that don't fit a truncating store (3, 5, 6, 7),
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg),
false, false, 0);
}
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg, j),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
switch (ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32)
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ?
+ &PPC::VSFRCRegClass :
+ &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
// Note that vector arguments in registers don't reserve stack space,
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
+ MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
+ MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
if (isVarArg) {
while ((ArgOffset % 16) != 0) {
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const uint16_t GPR_32[] = { // 32-bit registers.
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const uint16_t GPR_64[] = { // 64-bit registers.
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
// In 32-bit non-varargs functions, the stack space for vectors is after the
// stack space for non-vectors. We do not use this space unless we have
switch(ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
case MVT::f32:
VecArgOffset += 4;
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg,
- CurArgOffset),
+ MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg, j),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
switch (ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
if (!isPPC64) {
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+
+ if (ObjectVT == MVT::i1)
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
+
++GPR_idx;
} else {
needsLoad = true;
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32)
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary.
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
+ ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) {
if (!isVarArg && !isPPC64) {
// Non-varargs Altivec parameters go after all the non-Altivec
// parameters; handle those later so we know how much padding we need.
if (Flags.isByVal()) return false;
}
- // Non PIC/GOT tail calls are supported.
+ // Non-PIC/GOT tail calls are supported.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return true;
/// 32-bit value is representable in the immediate field of a BxA instruction.
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C) return 0;
+ if (!C) return nullptr;
int Addr = C->getZExtValue();
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
SignExtend32<26>(Addr) != Addr)
- return 0; // Top 6 bits have to be sext of immediate.
+ return nullptr; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2,
DAG.getTargetLoweringInfo().getPointerTy()).getNode();
static void
StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
SDValue Chain,
- const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
- SmallVector<SDValue, 8> &MemOpChains,
+ const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+ SmallVectorImpl<SDValue> &MemOpChains,
SDLoc dl) {
for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
SDValue Arg = TailCallArgs[i].Arg;
static void
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SDValue Arg, int SPDiff, unsigned ArgOffset,
- SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, MachinePointerInfo(0),
- MachinePointerInfo(0));
+ false, false, MachinePointerInfo(),
+ MachinePointerInfo());
}
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
SDValue Arg, SDValue PtrOff, int SPDiff,
unsigned ArgOffset, bool isPPC64, bool isTailCall,
- bool isVector, SmallVector<SDValue, 8> &MemOpChains,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+ bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
SDLoc dl) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
if (!isTailCall) {
void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
SDValue LROp, SDValue FPOp, bool isDarwinABI,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
MachineFunction &MF = DAG.getMachineFunction();
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
MemOpChains2, dl);
if (!MemOpChains2.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains2[0], MemOpChains2.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
- SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+ SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &PPCSubTarget) {
bool isPPC64 = PPCSubTarget.isPPC64();
// Load the address of the function entry point from the function
// descriptor.
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
- SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
- InFlag.getNode() ? 3 : 2);
+ SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
+ ArrayRef<SDValue>(MTCTROps, InFlag.getNode() ? 3 : 2));
Chain = LoadFuncPtr.getValue(1);
InFlag = LoadFuncPtr.getValue(2);
MTCTROps[2] = InFlag;
}
- Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
- 2 + (InFlag.getNode() != 0));
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
+ ArrayRef<SDValue>(MTCTROps, InFlag.getNode() ? 3 : 2));
InFlag = Chain.getValue(1);
NodeTys.clear();
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
CallOpc = PPCISD::BCTRL;
- Callee.setNode(0);
+ Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
if (isSVR4ABI && isPPC64)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
isa<ConstantSDNode>(Callee)) &&
"Expecting an global address, external symbol, absolute value or register");
- return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
}
// Add a NOP immediately after the branch instruction when using the 64-bit
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) &&
+ (!isLocalCall(Callee) ||
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
}
}
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
if (needsTOCRestore) {
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- SDLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
if (PPCSubTarget.isSVR4ABI()) {
if (PPCSubTarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
errs() << "Call operand #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << "\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
} else {
}
if (VA.isRegLoc()) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue Ops[] = { Chain, InFlag };
Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
- dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+ dl, VTs,
+ ArrayRef<SDValue>(Ops, InFlag.getNode() ? 2 : 1));
InFlag = Chain.getValue(1);
}
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const uint16_t GPR[] = {
+ static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
// Promote integers to 64-bit values.
- if (Arg.getValueType() == MVT::i32) {
+ if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
if (Size == 0)
continue;
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
if (isVarArg) {
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Load));
}
ArgOffset += 16;
for (unsigned i=0; i<16; i+=PtrByteSize) {
// stack space allocated at the end.
if (VR_idx != NumVRs) {
// Doesn't have GPR space allocated.
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const uint16_t GPR_32[] = { // 32-bit registers.
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const uint16_t GPR_64[] = { // 64-bit registers.
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
+
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// On Darwin, R12 must contain the address of an indirect callee. This does
// not mean the MTCTR instruction must use R12; it's easier to model this as
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
- &RetOps[0], RetOps.size());
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
// Build a DYNALLOC node.
SDValue Ops[3] = { Chain, NegSize, FPSIdx };
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
- return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
}
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
}
+SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 loads");
+
+ // First, load 8 bits into 32 bits, then truncate to 1 bit.
+
+ SDLoc dl(Op);
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand *MMO = LD->getMemOperand();
+
+ SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
+ BasePtr, MVT::i8, MMO);
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
+
+ SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
+ return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOperand(1).getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 stores");
+
+ // First, zero extend to 32 bits, then use a truncating store to 8 bits.
+
+ SDLoc dl(Op);
+ StoreSDNode *ST = cast<StoreSDNode>(Op);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ MachineMemOperand *MMO = ST->getMemOperand();
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+ return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
+}
+
+// FIXME: Remove this once the ANDI glue bug is fixed:
+SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 results");
+
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
+ Op.getOperand(0));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
SDValue Tmp;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
- DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
- MVT::i32, MMO);
+ DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
} else
Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
MPI, false, false, 0);
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
+ DAG.getConstantFP(1.0, Op.getValueType()),
+ DAG.getConstantFP(0.0, Op.getValueType()));
+
assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
- Ops, 2, MVT::i32, MMO);
+ Ops, MVT::i32, MMO);
} else {
assert(PPCSubTarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue MFFSreg, InFlag;
// Save FP Control Word to register
EVT NodeTys[] = {
MVT::f64, // return register
MVT::Glue // unused in this context
};
- SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, ArrayRef<SDValue>());
// Save FP register to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
Tmp4, Tmp6, ISD::SETLE);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
//===----------------------------------------------------------------------===//
SDValue Elt = DAG.getConstant(Val, MVT::i32);
SmallVector<SDValue, 8> Ops;
Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
- SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
- &Ops[0], Ops.size());
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
}
SelectionDAG &DAG) const {
SDLoc dl(Op);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+ assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
- &ResultMask[0], ResultMask.size());
+ ResultMask);
return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
}
DAG.getConstant(CompareOpc, MVT::i32)
};
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
- SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
DAG.getRegister(PPC::CR6, MVT::i32),
CompNode.getValue(1));
return Flags;
}
+SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
+ // instructions), but for smaller types, we need to first extend up to v2i32
+ // before doing going farther.
+ if (Op.getValueType() == MVT::v2i64) {
+ EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (ExtVT != MVT::v2i32) {
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
+ DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
+ ExtVT.getVectorElementType(), 4)));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
+ DAG.getValueType(MVT::v2i32));
+ }
+
+ return Op;
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
case ISD::VAARG:
return LowerVAARG(Op, DAG, PPCSubTarget);
+ case ISD::VACOPY:
+ return LowerVACOPY(Op, DAG, PPCSubTarget);
+
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
// For counter-based loop handling.
return;
}
case ISD::FP_TO_SINT:
+ // LowerFP_TO_INT() can only handle f32 and f64.
+ if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
}
F->insert(It, loopMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
F->insert(It, loopMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// Note that the structure of the jmp_buf used here is not compatible
// thisMBB:
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
// Prepare IP either in reg.
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
.addReg(PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
-
MIB.setMemRefs(MMOBegin, MMOEnd);
}
+ // Naked functions never have a base pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned BaseReg;
+ if (MF->getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ else
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
const PPCRegisterInfo *TRI =
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
unsigned BufReg = MI->getOperand(0).getReg();
}
MIB.setMemRefs(MMOBegin, MMOEnd);
- // FIXME: When we also support base pointers, that register must also be
- // restored here.
+ // Reload BP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
MachineFunction *F = BB->getParent();
if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8)) {
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8)) {
SmallVector<MachineOperand, 2> Cond;
- Cond.push_back(MI->getOperand(4));
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)
+ Cond.push_back(MI->getOperand(4));
+ else
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
- MI->getOpcode() == PPC::SELECT_CC_VRRC) {
-
-
+ MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_VRRC) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
MachineBasicBlock *thisMBB = BB;
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
- BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ if (MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_VRRC) {
+ BuildMI(BB, dl, TII->get(PPC::BC))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ } else {
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ }
// copy0MBB:
// %FalseValue = ...
F->insert(It, midMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
F->insert(It, midMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
- } else if (MI->getOpcode() == PPC::FRINDrint ||
- MI->getOpcode() == PPC::FRINSrint) {
- bool isf32 = MI->getOpcode() == PPC::FRINSrint;
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
- DebugLoc dl = MI->getDebugLoc();
+ } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
+ unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
+ PPC::ANDIo8 : PPC::ANDIo;
+ bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
-
- // Perform the rounding.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
- .addReg(Src);
-
- // Compare the results.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
- .addReg(Dest).addReg(Src);
-
- // If the results were not equal, then set the FPSCR XX bit.
- MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, midMBB);
- F->insert(It, exitMBB);
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+ unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
+ &PPC::GPRCRegClass :
+ &PPC::G8RCRegClass);
- BB->addSuccessor(midMBB);
- BB->addSuccessor(exitMBB);
-
- BB = midMBB;
-
- // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
- // the FI bit here because that will not automatically set XX also,
- // and XX is what libm interprets as the FE_INEXACT flag.
- BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
- BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
-
- BB->addSuccessor(exitMBB);
-
- BB = exitMBB;
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+ .addReg(MI->getOperand(1).getReg()).addImm(1);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+ MI->getOperand(0).getReg())
+ .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
return true;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const GlobalValue *GV1 = NULL;
- const GlobalValue *GV2 = NULL;
+ const GlobalValue *GV1 = nullptr;
+ const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
return false;
}
+SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ assert(PPCSubTarget.useCRBits() &&
+ "Expecting to be tracking CR bits");
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // trunc(binary-ops(zext(x), zext(y)))
+ // or
+ // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
+ // such that we're unnecessarily moving things into GPRs when it would be
+ // better to keep them in CR bits.
+
+ // Note that trunc here can be an actual i1 trunc, or can be the effective
+ // truncation that comes from a setcc or select_cc.
+ if (N->getOpcode() == ISD::TRUNCATE &&
+ N->getValueType(0) != MVT::i1)
+ return SDValue();
+
+ if (N->getOperand(0).getValueType() != MVT::i32 &&
+ N->getOperand(0).getValueType() != MVT::i64)
+ return SDValue();
+
+ if (N->getOpcode() == ISD::SETCC ||
+ N->getOpcode() == ISD::SELECT_CC) {
+ // If we're looking at a comparison, then we need to make sure that the
+ // high bits (all except for the first) don't matter the result.
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(N->getOperand(
+ N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
+ unsigned OpBits = N->getOperand(0).getValueSizeInBits();
+
+ if (ISD::isSignedIntSetCC(CC)) {
+ if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
+ DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
+ return SDValue();
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (!DAG.MaskedValueIsZero(N->getOperand(0),
+ APInt::getHighBitsSet(OpBits, OpBits-1)) ||
+ !DAG.MaskedValueIsZero(N->getOperand(1),
+ APInt::getHighBitsSet(OpBits, OpBits-1)))
+ return SDValue();
+ } else {
+ // This is neither a signed nor an unsigned comparison, just make sure
+ // that the high bits are equal.
+ APInt Op1Zero, Op1One;
+ APInt Op2Zero, Op2One;
+ DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One);
+ DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One);
+
+ // We don't really care about what is known about the first bit (if
+ // anything), so clear it in all masks prior to comparing them.
+ Op1Zero.clearBit(0); Op1One.clearBit(0);
+ Op2Zero.clearBit(0); Op2One.clearBit(0);
+
+ if (Op1Zero != Op2Zero || Op1One != Op2One)
+ return SDValue();
+ }
+ }
+
+ // We now know that the higher-order bits are irrelevant, we just need to
+ // make sure that all of the intermediate operations are bit operations, and
+ // all inputs are extensions.
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
+ N->getOperand(1).getOpcode() != ISD::AND &&
+ N->getOperand(1).getOpcode() != ISD::OR &&
+ N->getOperand(1).getOpcode() != ISD::XOR &&
+ N->getOperand(1).getOpcode() != ISD::SELECT &&
+ N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps, PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ for (unsigned i = 0; i < 2; ++i) {
+ if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(N->getOperand(i)))
+ Inputs.push_back(N->getOperand(i));
+ else
+ BinOps.push_back(N->getOperand(i));
+
+ if (N->getOpcode() == ISD::TRUNCATE)
+ break;
+ }
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by extensions.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()))
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
+ BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not an extension or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ // Replace all inputs with the extension operand.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constants may have users outside the cluster of to-be-promoted nodes,
+ // and so we need to replace those as we do the promotions.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (i1) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first. Any intermediate truncations or
+ // extensions disappear.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ if (PromOp.getOpcode() == ISD::TRUNCATE ||
+ PromOp.getOpcode() == ISD::SIGN_EXTEND ||
+ PromOp.getOpcode() == ISD::ZERO_EXTEND ||
+ PromOp.getOpcode() == ISD::ANY_EXTEND) {
+ if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
+ PromOp.getOperand(0).getValueType() != MVT::i1) {
+ // The operand is not yet ready (see comment below).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SDValue RepValue = PromOp.getOperand(0);
+ if (isa<ConstantSDNode>(RepValue))
+ RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
+ continue;
+ }
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != MVT::i1) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If there are any constant inputs, make sure they're replaced now.
+ for (unsigned i = 0; i < 2; ++i)
+ if (isa<ConstantSDNode>(Ops[C+i]))
+ Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
+ }
+
+ // Now we're left with the initial truncation itself.
+ if (N->getOpcode() == ISD::TRUNCATE)
+ return N->getOperand(0);
+
+ // Otherwise, this is a comparison. The operands to be compared have just
+ // changed type (to i1), but everything else is the same.
+ return SDValue(N, 0);
+}
+
+SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // zext(binary-ops(trunc(x), trunc(y)))
+ // or
+ // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
+ // such that we're unnecessarily moving things into CR bits that can more
+ // efficiently stay in GPRs. Note that if we're not certain that the high
+ // bits are set as required by the final extension, we still may need to do
+ // some masking to get the proper behavior.
+
+ // This same functionality is important on PPC64 when dealing with
+ // 32-to-64-bit extensions; these occur often when 32-bit values are used as
+ // the return values of functions. Because it is so similar, it is handled
+ // here as well.
+
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ if (!((N->getOperand(0).getValueType() == MVT::i1 &&
+ PPCSubTarget.useCRBits()) ||
+ (N->getOperand(0).getValueType() == MVT::i32 &&
+ PPCSubTarget.isPPC64())))
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by truncations.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()))
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not a truncation or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ unsigned PromBits = N->getOperand(0).getValueSizeInBits();
+ bool ReallyNeedsExt = false;
+ if (N->getOpcode() != ISD::ANY_EXTEND) {
+ // If all of the inputs are not already sign/zero extended, then
+ // we'll still need to do that at the end.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ unsigned OpBits =
+ Inputs[i].getOperand(0).getValueSizeInBits();
+ assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
+
+ if ((N->getOpcode() == ISD::ZERO_EXTEND &&
+ !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
+ APInt::getHighBitsSet(OpBits,
+ OpBits-PromBits))) ||
+ (N->getOpcode() == ISD::SIGN_EXTEND &&
+ DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
+ (OpBits-(PromBits-1)))) {
+ ReallyNeedsExt = true;
+ break;
+ }
+ }
+ }
+
+ // Replace all inputs, either with the truncation operand, or a
+ // truncation or extension to the final output type.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constant inputs need to be replaced with the to-be-promoted nodes that
+ // use them because they might have users outside of the cluster of
+ // promoted nodes.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ SDValue InSrc = Inputs[i].getOperand(0);
+ if (Inputs[i].getValueType() == N->getValueType(0))
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
+ else if (N->getOpcode() == ISD::SIGN_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (promoted) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If this node has constant inputs, then they'll need to be promoted here.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (!isa<ConstantSDNode>(Ops[C+i]))
+ continue;
+ if (Ops[C+i].getValueType() == N->getValueType(0))
+ continue;
+
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else
+ Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ }
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
+ }
+
+ // Now we're left with the initial extension itself.
+ if (!ReallyNeedsExt)
+ return N->getOperand(0);
+
+ // To zero extend, just mask off everything except for the first bit (in the
+ // i1 case).
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
+ DAG.getConstant(APInt::getLowBitsSet(
+ N->getValueSizeInBits(0), PromBits),
+ N->getValueType(0)));
+
+ assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+ "Invalid extension type");
+ EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+ SDValue ShiftCst =
+ DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
+ return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
+ DAG.getNode(ISD::SHL, dl, N->getValueType(0),
+ N->getOperand(0), ShiftCst), ShiftCst);
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
return N->getOperand(0);
}
break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ return DAGCombineExtBoolTrunc(N, DCI);
+ case ISD::TRUNCATE:
+ case ISD::SETCC:
+ case ISD::SELECT_CC:
+ return DAGCombineTruncBoolExt(N, DCI);
case ISD::FDIV: {
assert(TM.Options.UnsafeFPMath &&
"Reciprocal estimates require UnsafeFPMath");
if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV);
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV,
}
SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
// Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
// reciprocal sqrt.
SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode() != 0)
+ if (RV.getNode()) {
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
+
+ EVT VT = RV.getValueType();
+
+ SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+ Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+ }
+
+ SDValue ZeroCmp =
+ DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ DCI.AddToWorklist(ZeroCmp.getNode());
+ DCI.AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+ ZeroCmp, Zero, RV);
return RV;
+ }
}
}
};
Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
- DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ DAG.getVTList(MVT::Other), Ops,
cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
};
return
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
- Ops, array_lengthof(Ops),
- cast<StoreSDNode>(N)->getMemoryVT(),
+ Ops, cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
}
break;
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
- DCI.getDAGCombineLevel() == AfterLegalizeTypes &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
// you might suspect (sizeof(vector) bytes after the last requested
// load), but rather sizeof(vector) - 1 bytes after the last
// requested vector. The point of this is to avoid a page fault if the
- // base address happend to be aligned. This works because if the base
+ // base address happened to be aligned. This works because if the base
// address is aligned, then adding less than a full vector length will
// cause the last vector in the sequence to be (re)loaded. Otherwise,
// the next vector will be fetched as you might suspect was necessary.
}
}
}
+
+ break;
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
MVT::i64 : MVT::i32, MVT::Other),
- Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
!N->getOperand(2).hasOneUse()) {
// Scan all of the users of the LHS, looking for VCMPo's that match.
- SDNode *VCMPoNode = 0;
+ SDNode *VCMPoNode = nullptr;
SDNode *LHSN = N->getOperand(0).getNode();
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
// Look at the (necessarily single) use of the flag value. If it has a
// chain, this transformation is more complex. Note that multiple things
// could use the value result, which we should ignore.
- SDNode *FlagUser = 0;
+ SDNode *FlagUser = nullptr;
for (SDNode::use_iterator UI = VCMPoNode->use_begin();
- FlagUser == 0; ++UI) {
+ FlagUser == nullptr; ++UI) {
assert(UI != VCMPoNode->use_end() && "Didn't find user!");
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
}
}
- // If the user is a MFCR instruction, we know this is safe. Otherwise we
- // give up for right now.
- if (FlagUser->getOpcode() == PPCISD::MFCR)
+ // If the user is a MFOCRF instruction, we know this is safe.
+ // Otherwise we give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFOCRF)
return SDValue(VCMPoNode, 0);
}
break;
}
+ case ISD::BRCOND: {
+ SDValue Cond = N->getOperand(1);
+ SDValue Target = N->getOperand(2);
+
+ if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero) {
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
+ assert(Cond.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
+ N->getOperand(0), Target);
+ }
+ }
+ break;
case ISD::BR_CC: {
// If this is a branch on an altivec predicate comparison, lower this so
- // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
// lowering is done pre-legalize, because the legalizer lowers the predicate
// compare down to code that is difficult to reassemble.
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
DAG.getConstant(CompareOpc, MVT::i32)
};
EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
// Unpack the result based on how the target uses it.
PPC::Predicate CompOpc;
// suboptimal.
return C_Memory;
}
+ } else if (Constraint == "wc") { // individual CR bits.
+ return C_RegisterClass;
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
}
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
+
// Look at the constraint type.
+ if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
+ return CW_Register; // an individual CR bit.
+ else if ((StringRef(constraint) == "wa" ||
+ StringRef(constraint) == "wd" ||
+ StringRef(constraint) == "wf") &&
+ type->isVectorTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
+
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
std::pair<unsigned, const TargetRegisterClass*>
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
}
- }
-
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ } else if (Constraint == "wc") { // an individual CR bit.
+ return std::make_pair(0U, &PPC::CRBITRCRegClass);
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf") {
+ return std::make_pair(0U, &PPC::VSRCRegClass);
+ } else if (Constraint == "ws") {
+ return std::make_pair(0U, &PPC::VSFRCRegClass);
+ }
+
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+ // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+ // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+ // register.
+ // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+ // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+ if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ PPC::GPRCRegClass.contains(R.first)) {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ return std::make_pair(TRI->getMatchingSuperReg(R.first,
+ PPC::sub_32, &PPC::G8RCRegClass),
+ &PPC::G8RCRegClass);
+ }
+
+ return R;
}
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
- SDValue Result(0,0);
+ SDValue Result;
// Only support length 1 constraints.
if (Constraint.length() > 1) return;
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
}
}
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0 || BitSize > 64)
+ return false;
+ return true;
+}
+
+bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
+bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned,
bool *Fast) const {
if (DisablePPCUnaligned)
return false;
if (!VT.isSimple())
return false;
- if (VT.getSimpleVT().isVector())
- return false;
+ if (VT.getSimpleVT().isVector()) {
+ if (PPCSubTarget.hasVSX()) {
+ if (VT != MVT::v2f64 && VT != MVT::v2i64)
+ return false;
+ } else {
+ return false;
+ }
+ }
if (VT == MVT::ppcf128)
return false;
return true;
}
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
- case MVT::v4f32:
return true;
default:
break;
return false;
}
+bool
+PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
+ EVT VT , unsigned DefinedValues) const {
+ if (VT == MVT::v2i64)
+ return false;
+
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref)
+ if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
}
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const {
+ return PPC::createFastISel(FuncInfo, LibInfo);
+}