From 7ae3bcca4502f79fdadbfbbb0e68c5e14cc699fa Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 5 Nov 2012 23:48:20 +0000 Subject: [PATCH] CostModel: Add tables for the common x86 compares. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167421 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 87 +++++++++++++++++++++++++++--- lib/Target/X86/X86ISelLowering.h | 4 +- test/Analysis/CostModel/X86/cmp.ll | 42 +++++++++++++++ 3 files changed, 125 insertions(+), 8 deletions(-) create mode 100644 test/Analysis/CostModel/X86/cmp.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index afc71db0057..0918f954a4b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17517,6 +17517,15 @@ struct X86CostTblEntry { unsigned Cost; }; +int FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) + return i; + + // Could not find an entry. + return -1; +} + unsigned X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { @@ -17543,12 +17552,12 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, }; // Look for AVX1 lowering tricks. - if (ST.hasAVX()) - for (unsigned int i = 0, e = array_lengthof(AVX1CostTable); i < e; ++i) { - if (AVX1CostTable[i].ISD == ISD && AVX1CostTable[i].Type == LT.second) - return LT.first * AVX1CostTable[i].Cost; - } - + if (ST.hasAVX()) { + int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + LT.second); + if (Idx != -1) + return LT.first * AVX1CostTable[Idx].Cost; + } // Fallback to the default implementation. return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); } @@ -17558,7 +17567,7 @@ X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { assert(Val->isVectorTy() && "This must be a vector type"); - if (Index != -1u) { + if (Index != -1U) { // Legalize the type. std::pair LT = getTypeLegalizationCost(Val->getContext(), TLI->getValueType(Val)); @@ -17579,3 +17588,67 @@ X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index); } +unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, + Type *ValTy, + Type *CondTy) const { + // Legalize the type. + std::pair LT = + getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy)); + + MVT MTy = LT.second; + + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + const X86Subtarget &ST = + TLI->getTargetMachine().getSubtarget(); + + static const X86CostTblEntry SSE42CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + { ISD::SETCC, MVT::v2i64, 1 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, + }; + + static const X86CostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1 }, + { ISD::SETCC, MVT::v8f32, 1 }, + // AVX1 does not support 8-wide integer compare. + { ISD::SETCC, MVT::v4i64, 4 }, + { ISD::SETCC, MVT::v8i32, 4 }, + { ISD::SETCC, MVT::v16i16, 4 }, + { ISD::SETCC, MVT::v32i8, 4 }, + }; + + static const X86CostTblEntry AVX2CostTbl[] = { + { ISD::SETCC, MVT::v4i64, 1 }, + { ISD::SETCC, MVT::v8i32, 1 }, + { ISD::SETCC, MVT::v16i16, 1 }, + { ISD::SETCC, MVT::v32i8, 1 }, + }; + + if (ST.hasSSE42()) { + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTbl[Idx].Cost; + } + + if (ST.hasAVX()) { + int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTbl[Idx].Cost; + } + + if (ST.hasAVX2()) { + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX2CostTbl[Idx].Cost; + } + + return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + + + diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3ecef983bd3..40dfaa0d434 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -957,8 +957,10 @@ namespace llvm { virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; - }; + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + }; } #endif // X86ISELLOWERING_H diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll new file mode 100644 index 00000000000..f868bd18b54 --- /dev/null +++ b/test/Analysis/CostModel/X86/cmp.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @cmp(i32 %arg) { + ; -- floats -- + ;CHECK: cost of 1 {{.*}} fcmp + %A = fcmp olt <2 x float> undef, undef + ;CHECK: cost of 1 {{.*}} fcmp + %B = fcmp olt <4 x float> undef, undef + ;CHECK: cost of 1 {{.*}} fcmp + %C = fcmp olt <8 x float> undef, undef + ;CHECK: cost of 1 {{.*}} fcmp + %D = fcmp olt <2 x double> undef, undef + ;CHECK: cost of 1 {{.*}} fcmp + %E = fcmp olt <4 x double> undef, undef + + ; -- integers -- + + ;CHECK: cost of 1 {{.*}} icmp + %F = icmp eq <16 x i8> undef, undef + ;CHECK: cost of 1 {{.*}} icmp + %G = icmp eq <8 x i16> undef, undef + ;CHECK: cost of 1 {{.*}} icmp + %H = icmp eq <4 x i32> undef, undef + ;CHECK: cost of 1 {{.*}} icmp + %I = icmp eq <2 x i64> undef, undef + ;CHECK: cost of 4 {{.*}} icmp + %J = icmp eq <4 x i64> undef, undef + ;CHECK: cost of 4 {{.*}} icmp + %K = icmp eq <8 x i32> undef, undef + ;CHECK: cost of 4 {{.*}} icmp + %L = icmp eq <16 x i16> undef, undef + ;CHECK: cost of 4 {{.*}} icmp + %M = icmp eq <32 x i8> undef, undef + + ;CHECK: cost of 1 {{.*}} ret + ret i32 undef +} + + -- 2.34.1