X86TTI: i16/i32 vector div with a constant (splat) divisor are reasonably cheap now.

author Benjamin Kramer <benny.kra@googlemail.com>

Sat, 26 Apr 2014 14:53:05 +0000 (14:53 +0000)

committer Benjamin Kramer <benny.kra@googlemail.com>

Sat, 26 Apr 2014 14:53:05 +0000 (14:53 +0000)
author Benjamin Kramer <benny.kra@googlemail.com>
Sat, 26 Apr 2014 14:53:05 +0000 (14:53 +0000)
committer Benjamin Kramer <benny.kra@googlemail.com>
Sat, 26 Apr 2014 14:53:05 +0000 (14:53 +0000)
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp

index 5158004cadb262bd15e6ad8a95a8735078a1693c..628e9130a151e69d308dcb4d8e58f1a239b72ee3 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -284,6 +284,21 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
    assert(ISD && "Invalid opcode");
  
+  static const CostTblEntry<MVT::SimpleValueType>
+  AVX2UniformConstCostTable[] = {
+    { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence
+    { ISD::UDIV, MVT::v16i16,  6 }, // vpmulhuw sequence
+    { ISD::SDIV, MVT::v8i32,  15 }, // vpmuldq sequence
+    { ISD::UDIV, MVT::v8i32,  15 }, // vpmuludq sequence
+  };
+
+  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+      ST->hasAVX2()) {
+    int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * AVX2UniformConstCostTable[Idx].Cost;
+  }
+
    static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
      // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
      // customize them to detect the cases where shift amount is a scalar one.
@@ -351,6 +366,10 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
      { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
      { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
      { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+
+    { ISD::SDIV, MVT::v8i16,  6 }, // pmulhw sequence
+    { ISD::UDIV, MVT::v8i16,  6 }, // pmulhuw sequence
+    { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
    };
  
    if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
diff --git a/test/Analysis/CostModel/X86/vdiv-cost.ll b/test/Analysis/CostModel/X86/vdiv-cost.ll

new file mode 100644 (file)

index 0000000..4ba1ef0
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vdiv-cost.ll
@@ -0,0 +1,92 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+define <4 x i32> @test1(<4 x i32> %a) {
+  %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test1':
+; SSE2: Found an estimated cost of 15 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test2(<8 x i32> %a) {
+  %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test2':
+; SSE2: Found an estimated cost of 30 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i16> @test3(<8 x i16> %a) {
+  %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test3':
+; SSE2: Found an estimated cost of 6 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i16> @test4(<16 x i16> %a) {
+  %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test4':
+; SSE2: Found an estimated cost of 12 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <8 x i16> @test5(<8 x i16> %a) {
+  %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test5':
+; SSE2: Found an estimated cost of 6 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i16> @test6(<16 x i16> %a) {
+  %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test6':
+; SSE2: Found an estimated cost of 12 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i8> @test7(<16 x i8> %a) {
+  %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test7':
+; SSE2: Found an estimated cost of 320 for instruction:   %div
+; AVX2: Found an estimated cost of 320 for instruction:   %div
+}
+
+define <4 x i32> @test8(<4 x i32> %a) {
+  %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test8':
+; SSE2: Found an estimated cost of 15 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test9(<8 x i32> %a) {
+  %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test9':
+; SSE2: Found an estimated cost of 30 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test10(<8 x i32> %a) {
+  %div = sdiv <8 x i32> %a, <i32 8, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test10':
+; SSE2: Found an estimated cost of 160 for instruction:   %div
+; AVX2: Found an estimated cost of 160 for instruction:   %div
+}
author	Benjamin Kramer <benny.kra@googlemail.com>
	Sat, 26 Apr 2014 14:53:05 +0000 (14:53 +0000)
committer	Benjamin Kramer <benny.kra@googlemail.com>
	Sat, 26 Apr 2014 14:53:05 +0000 (14:53 +0000)
lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/vdiv-cost.ll	[new file with mode: 0644]	patch \| blob