From 7ae3bcca4502f79fdadbfbbb0e68c5e14cc699fa Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Mon, 5 Nov 2012 23:48:20 +0000
Subject: [PATCH] CostModel: Add tables for the common x86 compares.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167421 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp | 87 +++++++++++++++++++++++++++---
 lib/Target/X86/X86ISelLowering.h   |  4 +-
 test/Analysis/CostModel/X86/cmp.ll | 42 +++++++++++++++
 3 files changed, 125 insertions(+), 8 deletions(-)
 create mode 100644 test/Analysis/CostModel/X86/cmp.ll
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index afc71db0057..0918f954a4b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17517,6 +17517,15 @@ struct X86CostTblEntry {
   unsigned Cost;
 };
 
+int FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
+  for (unsigned int i = 0; i < len; ++i)
+    if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
+      return i;
+
+  // Could not find an entry.
+  return -1;
+}
+
 unsigned
 X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
                                                      Type *Ty) const {
@@ -17543,12 +17552,12 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
     };
 
   // Look for AVX1 lowering tricks.
-  if (ST.hasAVX())
-    for (unsigned int i = 0, e = array_lengthof(AVX1CostTable); i < e; ++i) {
-      if (AVX1CostTable[i].ISD == ISD && AVX1CostTable[i].Type == LT.second)
-        return LT.first * AVX1CostTable[i].Cost;
-    }
-
+  if (ST.hasAVX()) {
+    int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
+                          LT.second);
+    if (Idx != -1)
+      return LT.first * AVX1CostTable[Idx].Cost;
+  }
   // Fallback to the default implementation.
   return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
 }
@@ -17558,7 +17567,7 @@ X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
                                                  unsigned Index) const {
   assert(Val->isVectorTy() && "This must be a vector type");
 
-  if (Index != -1u) {
+  if (Index != -1U) {
     // Legalize the type.
     std::pair<unsigned, MVT> LT =
     getTypeLegalizationCost(Val->getContext(), TLI->getValueType(Val));
@@ -17579,3 +17588,67 @@ X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
   return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
 }
 
+unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode,
+                                                          Type *ValTy,
+                                                          Type *CondTy) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT =
+  getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy));
+
+  MVT MTy = LT.second;
+
+  int ISD = InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  const X86Subtarget &ST =
+  TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+  static const X86CostTblEntry SSE42CostTbl[] = {
+    { ISD::SETCC,   MVT::v2f64,   1 },
+    { ISD::SETCC,   MVT::v4f32,   1 },
+    { ISD::SETCC,   MVT::v2i64,   1 },
+    { ISD::SETCC,   MVT::v4i32,   1 },
+    { ISD::SETCC,   MVT::v8i16,   1 },
+    { ISD::SETCC,   MVT::v16i8,   1 },
+  };
+
+  static const X86CostTblEntry AVX1CostTbl[] = {
+    { ISD::SETCC,   MVT::v4f64,   1 },
+    { ISD::SETCC,   MVT::v8f32,   1 },
+    // AVX1 does not support 8-wide integer compare.
+    { ISD::SETCC,   MVT::v4i64,   4 },
+    { ISD::SETCC,   MVT::v8i32,   4 },
+    { ISD::SETCC,   MVT::v16i16,  4 },
+    { ISD::SETCC,   MVT::v32i8,   4 },
+  };
+
+  static const X86CostTblEntry AVX2CostTbl[] = {
+    { ISD::SETCC,   MVT::v4i64,   1 },
+    { ISD::SETCC,   MVT::v8i32,   1 },
+    { ISD::SETCC,   MVT::v16i16,  1 },
+    { ISD::SETCC,   MVT::v32i8,   1 },
+  };
+
+  if (ST.hasSSE42()) {
+    int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * SSE42CostTbl[Idx].Cost;
+  }
+
+  if (ST.hasAVX()) {
+    int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * AVX1CostTbl[Idx].Cost;
+  }
+
+  if (ST.hasAVX2()) {
+    int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * AVX2CostTbl[Idx].Cost;
+  }
+
+  return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+
+
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 3ecef983bd3..40dfaa0d434 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -957,8 +957,10 @@ namespace llvm {
 
     virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
                                         unsigned Index) const;
-  };
 
+    unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                Type *CondTy) const;
+  };
 }
 
 #endif    // X86ISELLOWERING_H
diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll
new file mode 100644
index 00000000000..f868bd18b54
--- /dev/null
+++ b/test/Analysis/CostModel/X86/cmp.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @cmp(i32 %arg) {
+  ;  -- floats --
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %A = fcmp olt <2 x float> undef, undef
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %B = fcmp olt <4 x float> undef, undef
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %C = fcmp olt <8 x float> undef, undef
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %D = fcmp olt <2 x double> undef, undef
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %E = fcmp olt <4 x double> undef, undef
+
+  ;  -- integers --
+
+  ;CHECK: cost of 1 {{.*}} icmp
+  %F = icmp eq <16 x i8> undef, undef
+  ;CHECK: cost of 1 {{.*}} icmp
+  %G = icmp eq <8 x i16> undef, undef
+  ;CHECK: cost of 1 {{.*}} icmp
+  %H = icmp eq <4 x i32> undef, undef
+  ;CHECK: cost of 1 {{.*}} icmp
+  %I = icmp eq <2 x i64> undef, undef
+  ;CHECK: cost of 4 {{.*}} icmp
+  %J = icmp eq <4 x i64> undef, undef
+  ;CHECK: cost of 4 {{.*}} icmp
+  %K = icmp eq <8 x i32> undef, undef
+  ;CHECK: cost of 4 {{.*}} icmp
+  %L = icmp eq <16 x i16> undef, undef
+  ;CHECK: cost of 4 {{.*}} icmp
+  %M = icmp eq <32 x i8> undef, undef
+
+  ;CHECK: cost of 1 {{.*}} ret
+  ret i32 undef
+}
+
+
-- 
2.34.1