From e623702c22e7cee4e02332b245a417a88ae6ffff Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 5 Nov 2012 19:32:46 +0000 Subject: [PATCH] Implement the cost of abnormal x86 instruction lowering as a table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167395 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetTransformImpl.h | 5 +- lib/Target/TargetTransformImpl.cpp | 18 ++-- lib/Target/X86/X86ISelLowering.cpp | 86 ++++++++----------- test/Analysis/CostModel/X86/arith.ll | 2 + .../LoopVectorize/X86/conversion-cost.ll | 2 +- 5 files changed, 53 insertions(+), 60 deletions(-) diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h index 625be7208ad..d5ab3728afd 100644 --- a/include/llvm/Target/TargetTransformImpl.h +++ b/include/llvm/Target/TargetTransformImpl.h @@ -55,13 +55,16 @@ protected: const TargetLowering *TLI; /// Estimate the cost of type-legalization and the legalized type. - std::pair + std::pair getTypeLegalizationCost(LLVMContext &C, EVT Ty) const; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + // Get the ISD node that corresponds to the Instruction class opcode. + int InstructionOpcodeToISD(unsigned Opcode) const; + public: explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {} diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index 4cd07cd6c37..a9f02edaae4 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -60,7 +60,7 @@ bool ScalarTargetTransformImpl::shouldBuildLookupTables() const { // Calls used by the vectorizers. // //===----------------------------------------------------------------------===// -static int InstructionOpcodeToISD(unsigned Opcode) { +int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const { enum InstructionOpcodes { #define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, #define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM @@ -130,7 +130,7 @@ static int InstructionOpcodeToISD(unsigned Opcode) { llvm_unreachable("Unknown instruction type encountered!"); } -std::pair +std::pair VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, EVT Ty) const { unsigned Cost = 1; @@ -141,7 +141,7 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty); if (LK.first == TargetLowering::TypeLegal) - return std::make_pair(Cost, Ty); + return std::make_pair(Cost, Ty.getSimpleVT()); if (LK.first == TargetLowering::TypeSplitVector) Cost *= 2; @@ -174,7 +174,7 @@ unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode, int ISD = InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair LT = + std::pair LT = getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty)); if (!TLI->isOperationExpand(ISD, LT.second)) { @@ -205,10 +205,10 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, int ISD = InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair SrcLT = + std::pair SrcLT = getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); - std::pair DstLT = + std::pair DstLT = getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst)); // Handle scalar conversions. @@ -283,7 +283,7 @@ unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode, ISD = ISD::VSELECT; } - std::pair LT = + std::pair LT = getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy)); if (!TLI->isOperationExpand(ISD, LT.second)) { @@ -326,7 +326,7 @@ unsigned VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - std::pair LT = + std::pair LT = getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); // Assume that all loads of legal types cost 1. @@ -335,7 +335,7 @@ VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const { - std::pair LT = + std::pair LT = getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp)); return LT.first; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0d38ba236e6..575d30df2e0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17505,63 +17505,51 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return Res; } +//===----------------------------------------------------------------------===// +// +// X86 cost model. +// +//===----------------------------------------------------------------------===// + +struct X86CostTblEntry { + int ISD; + MVT Type; + unsigned Cost; +}; + unsigned X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + // Legalize the type. + std::pair LT = + getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty)); + + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - // Fix some of the inaccuracies of the target independent estimation. - if (Ty->isVectorTy() && ST.hasSSE41()) { - unsigned NumElem = Ty->getVectorNumElements(); - unsigned SizeInBits = Ty->getScalarType()->getScalarSizeInBits(); - - bool Is2 = (NumElem == 2); - bool Is4 = (NumElem == 4); - bool Is8 = (NumElem == 8); - bool Is32bits = (SizeInBits == 32); - bool Is64bits = (SizeInBits == 64); - bool HasAvx = ST.hasAVX(); - bool HasAvx2 = ST.hasAVX2(); - - switch (Opcode) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: { - // Only AVX2 has support for 8-wide integer operations. - if (Is32bits && (Is4 || (Is8 && HasAvx2))) return 1; - if (Is64bits && (Is2 || (Is4 && HasAvx2))) return 1; - - // We don't have to completly scalarize unsupported ops. We can - // issue two half-sized operations (with some overhead). - // We don't need to extract the lower part of the YMM to the XMM. - // Extract the upper, two ops, insert the upper = 4. - if (Is32bits && Is8 && HasAvx) return 4; - if (Is64bits && Is4 && HasAvx) return 4; - break; - } - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: { - // AVX has support for 8-wide float operations. - if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1; - if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1; - break; - } - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // AVX has support for 8-wide integer bitwise operations. - if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1; - if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1; - break; - } + static const X86CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::MUL, MVT::v4i64, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + }; + + // Look for AVX1 lowering tricks. + if (ST.hasAVX()) + for (unsigned int i = 0, e = array_lengthof(AVX1CostTable); i < e; ++i) { + if (AVX1CostTable[i].ISD == ISD && AVX1CostTable[i].Type == LT.second) + return LT.first * AVX1CostTable[i].Cost; } - } + // Fallback to the default implementation. return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); } diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll index 58b4a7c4265..37cca8d5406 100644 --- a/test/Analysis/CostModel/X86/arith.ll +++ b/test/Analysis/CostModel/X86/arith.ll @@ -12,6 +12,8 @@ define i32 @add(i32 %arg) { %C = add <2 x i64> undef, undef ;CHECK: cost of 4 {{.*}} add %D = add <4 x i64> undef, undef + ;CHECK: cost of 8 {{.*}} add + %E = add <8 x i64> undef, undef ;CHECK: cost of 1 {{.*}} ret ret i32 undef } diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 8582613617a..19bcdc5d902 100644 --- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -25,7 +25,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun } ;CHECK: @conversion_cost2 -;CHECK: store <8 x float> +;CHECK-NOT: <8 x float> ;CHECK: ret define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 9 -- 2.34.1