}
X86TTI(const X86TargetMachine *TM)
- : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeX86TTIPass(*PassRegistry::getPassRegistry());
}
unsigned getNumberOfRegisters(bool Vector) const override;
unsigned getRegisterBitWidth(bool Vector) const override;
- unsigned getMaximumUnrollFactor() const override;
+ unsigned getMaxInterleaveFactor() const override;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind) const override;
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override;
unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const override;
unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
if (Vector && !ST->hasSSE1())
return 0;
- if (ST->is64Bit())
+ if (ST->is64Bit()) {
+ if (Vector && ST->hasAVX512())
+ return 32;
return 16;
+ }
return 8;
}
unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
if (Vector) {
+ if (ST->hasAVX512()) return 512;
if (ST->hasAVX()) return 256;
if (ST->hasSSE1()) return 128;
return 0;
}
-unsigned X86TTI::getMaximumUnrollFactor() const {
+unsigned X86TTI::getMaxInterleaveFactor() const {
if (ST->isAtom())
return 1;
return 2;
}
-unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
+unsigned X86TTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ISD == ISD::SDIV &&
+ Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+ // On X86, vector signed division by constants power-of-two are
+ // normally expanded to the sequence SRA + SRL + ADD + SRA.
+ // The OperandValue properties many not be same as that of previous
+ // operation;conservatively assume OP_None.
+ unsigned Cost =
+ 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+
+ return Cost;
+ }
+
static const CostTblEntry<MVT::SimpleValueType>
AVX2UniformConstCostTable[] = {
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
return LT.first * AVX2UniformConstCostTable[Idx].Cost;
}
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+ { ISD::SHL, MVT::v16i32, 1 },
+ { ISD::SRL, MVT::v16i32, 1 },
+ { ISD::SRA, MVT::v16i32, 1 },
+ { ISD::SHL, MVT::v8i64, 1 },
+ { ISD::SRL, MVT::v8i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+ };
+
static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX512CostTable[Idx].Cost;
+ }
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
}
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ AVX512ConversionTbl[] = {
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
+ { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
+
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
+
+ // v16i1 -> v16i32 - load + broadcast
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
+ LTSrc.second);
+ if (Idx != -1)
+ return AVX512ConversionTbl[Idx].Cost;
+ }
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
+
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
};
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
{ ISD::SETCC, MVT::v32i8, 1 },
};
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+ { ISD::SETCC, MVT::v8i64, 1 },
+ { ISD::SETCC, MVT::v16i32, 1 },
+ { ISD::SETCC, MVT::v8f64, 1 },
+ { ISD::SETCC, MVT::v16f32, 1 },
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX512CostTbl[Idx].Cost;
+ }
+
if (ST->hasAVX2()) {
int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
if (Idx != -1)