TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
static const CostTblEntry<MVT::SimpleValueType>
AVX2UniformConstCostTable[] = {
+ { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
+
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
{ ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
{ ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
{ ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
-
- { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
- { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
-
- { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
- { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
- { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
-
- // It is not a good idea to vectorize division. We have to scalarize it and
+\r
+ { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.\r
+ { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.\r
+ { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.\r
+ { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.\r
+\r
+ { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.\r
+ { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.\r
+ { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.\r
+ { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.\r
+\r
+ // It is not a good idea to vectorize division. We have to scalarize it and\r
// in the process we will often end up having to spilling regular
// registers. The overhead of division is going to dominate most kernels
// anyways so try hard to prevent vectorization of division - it is
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
unsigned Cost = 1;
if (LT.second.getSizeInBits() > 128)
Cost = 3; // Extract + insert + copy.
if (Kind == TTI::SK_Alternate) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// The backend knows how to generate a single VEX.256 version of
// instruction VPBLENDW if the target supports AVX2.
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
- std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+ std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
SSE2ConvTbl[] = {
if (Idx != -1)
return AVX512ConversionTbl[Idx].Cost;
}
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
// The function getSimpleVT only handles simple value types.
if (!SrcTy.isSimple() || !DstTy.isSimple())
unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
if (Index != -1U) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
unsigned Cost = 0;
- if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() &&
+ if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
bool IsPairwise) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
}
return X86TTIImpl::getIntImmCost(Imm, Ty);
}
-
-bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
- int DataWidth = DataTy->getPrimitiveSizeInBits();
-
- // Todo: AVX512 allows gather/scatter, works with strided and random as well
- if ((DataWidth < 32) || (Consecutive == 0))
- return false;
- if (ST->hasAVX512() || ST->hasAVX2())
- return true;
- return false;
-}
+\r
+bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {\r
+ int DataWidth = DataTy->getPrimitiveSizeInBits();\r
+ \r
+ // Todo: AVX512 allows gather/scatter, works with strided and random as well\r
+ if ((DataWidth < 32) || (Consecutive == 0))\r
+ return false;\r
+ if (ST->hasAVX512() || ST->hasAVX2()) \r
+ return true;\r
+ return false;\r
+}\r
bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {
return isLegalMaskedLoad(DataType, Consecutive);
}
+bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ // Work this as a subsetting of subtarget features.
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // FIXME: This is likely too limiting as it will include subtarget features
+ // that we might not care about for inlining, but it is conservatively
+ // correct.
+ return (CallerBits & CalleeBits) == CalleeBits;
+}