Implement the cost of abnormal x86 instruction lowering as a table.

author Nadav Rotem <nrotem@apple.com>

Mon, 5 Nov 2012 19:32:46 +0000 (19:32 +0000)

committer Nadav Rotem <nrotem@apple.com>

Mon, 5 Nov 2012 19:32:46 +0000 (19:32 +0000)
author Nadav Rotem <nrotem@apple.com>
Mon, 5 Nov 2012 19:32:46 +0000 (19:32 +0000)
committer Nadav Rotem <nrotem@apple.com>
Mon, 5 Nov 2012 19:32:46 +0000 (19:32 +0000)
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h

index 625be7208ad6d5e5c9794385a8e57e47405fad5b..d5ab3728afd70cfa1c786b324de4b34dd2a72af0 100644 (file)
--- a/include/llvm/Target/TargetTransformImpl.h
+++ b/include/llvm/Target/TargetTransformImpl.h
@@ -55,13 +55,16 @@ protected:
    const TargetLowering *TLI;
  
    /// Estimate the cost of type-legalization and the legalized type.
-  std::pair<unsigned, EVT>
+  std::pair<unsigned, MVT>
    getTypeLegalizationCost(LLVMContext &C, EVT Ty) const;
  
    /// Estimate the overhead of scalarizing an instruction. Insert and Extract
    /// are set if the result needs to be inserted and/or extracted from vectors.
    unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
  
+  // Get the ISD node that corresponds to the Instruction class opcode.
+  int InstructionOpcodeToISD(unsigned Opcode) const;
+
  public:
    explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
  
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp

index 4cd07cd6c373ef82e86cac3bfd89d83abf4e73d0..a9f02edaae4c513360054c9c606998a557427741 100644 (file)
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -60,7 +60,7 @@ bool ScalarTargetTransformImpl::shouldBuildLookupTables() const {
  // Calls used by the vectorizers.
  //
  //===----------------------------------------------------------------------===//
-static int InstructionOpcodeToISD(unsigned Opcode) {
+int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {
    enum InstructionOpcodes {
  #define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
  #define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
@@ -130,7 +130,7 @@ static int InstructionOpcodeToISD(unsigned Opcode) {
    llvm_unreachable("Unknown instruction type encountered!");
  }
  
-std::pair<unsigned, EVT>
+std::pair<unsigned, MVT>
  VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
                                                     EVT Ty) const {
    unsigned Cost = 1;
@@ -141,7 +141,7 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
      TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
  
      if (LK.first == TargetLowering::TypeLegal)
-      return std::make_pair(Cost, Ty);
+      return std::make_pair(Cost, Ty.getSimpleVT());
  
      if (LK.first == TargetLowering::TypeSplitVector)
        Cost *= 2;
@@ -174,7 +174,7 @@ unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode,
    int ISD = InstructionOpcodeToISD(Opcode);
    assert(ISD && "Invalid opcode");
  
-  std::pair<unsigned, EVT> LT =
+  std::pair<unsigned, MVT> LT =
    getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty));
  
    if (!TLI->isOperationExpand(ISD, LT.second)) {
@@ -205,10 +205,10 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
    int ISD = InstructionOpcodeToISD(Opcode);
    assert(ISD && "Invalid opcode");
  
-  std::pair<unsigned, EVT> SrcLT =
+  std::pair<unsigned, MVT> SrcLT =
    getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
  
-  std::pair<unsigned, EVT> DstLT =
+  std::pair<unsigned, MVT> DstLT =
    getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst));
  
    // Handle scalar conversions.
@@ -283,7 +283,7 @@ unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
        ISD = ISD::VSELECT;
    }
  
-  std::pair<unsigned, EVT> LT =
+  std::pair<unsigned, MVT> LT =
    getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy));
  
    if (!TLI->isOperationExpand(ISD, LT.second)) {
@@ -326,7 +326,7 @@ unsigned
  VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                             unsigned Alignment,
                                             unsigned AddressSpace) const {
-  std::pair<unsigned, EVT> LT =
+  std::pair<unsigned, MVT> LT =
    getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
  
    // Assume that all loads of legal types cost 1.
@@ -335,7 +335,7 @@ VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
  
  unsigned
  VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
-  std::pair<unsigned, EVT> LT =
+  std::pair<unsigned, MVT> LT =
      getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp));
    return LT.first;
  }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 0d38ba236e6cc38f6879cb07747272c8865c3266..575d30df2e0be6ff7a2540f5b217f8267f032c18 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17505,63 +17505,51 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
    return Res;
  }
  
+//===----------------------------------------------------------------------===//
+//
+// X86 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+struct X86CostTblEntry {
+  int ISD;
+  MVT Type;
+  unsigned Cost;
+};
+
  unsigned
  X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
                                                       Type *Ty) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT =
+  getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty));
+
+  int ISD = InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
    const X86Subtarget &ST =
    TLI->getTargetMachine().getSubtarget<X86Subtarget>();
  
-  // Fix some of the inaccuracies of the target independent estimation.
-  if (Ty->isVectorTy() && ST.hasSSE41()) {
-    unsigned NumElem = Ty->getVectorNumElements();
-    unsigned SizeInBits = Ty->getScalarType()->getScalarSizeInBits();
-
-    bool Is2 = (NumElem == 2);
-    bool Is4 = (NumElem == 4);
-    bool Is8 = (NumElem == 8);
-    bool Is32bits = (SizeInBits == 32);
-    bool Is64bits = (SizeInBits == 64);
-    bool HasAvx = ST.hasAVX();
-    bool HasAvx2 = ST.hasAVX2();
-
-    switch (Opcode) {
-      case Instruction::Add:
-      case Instruction::Sub:
-      case Instruction::Mul: {
-        // Only AVX2 has support for 8-wide integer operations.
-        if (Is32bits && (Is4 || (Is8 && HasAvx2))) return 1;
-        if (Is64bits && (Is2 || (Is4 && HasAvx2))) return 1;
-
-        // We don't have to completly scalarize unsupported ops. We can
-        // issue two half-sized operations (with some overhead).
-        // We don't need to extract the lower part of the YMM to the XMM.
-        // Extract the upper, two ops, insert the upper = 4.
-        if (Is32bits && Is8 && HasAvx) return 4;
-        if (Is64bits && Is4 && HasAvx) return 4;
-        break;
-      }
-      case Instruction::FAdd:
-      case Instruction::FSub:
-      case Instruction::FMul: {
-        // AVX has support for 8-wide float operations.
-        if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1;
-        if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1;
-        break;
-      }
-      case Instruction::Shl:
-      case Instruction::LShr:
-      case Instruction::AShr:
-      case Instruction::And:
-      case Instruction::Or:
-      case Instruction::Xor: {
-        // AVX has support for 8-wide integer bitwise operations.
-        if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1;
-        if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1;
-        break;
-      }
+  static const X86CostTblEntry AVX1CostTable[] = {
+    // We don't have to scalarize unsupported ops. We can issue two half-sized
+    // operations and we only need to extract the upper YMM half.
+    // Two ops + 1 extract + 1 insert = 4.
+    { ISD::MUL,     MVT::v8i32,    4 },
+    { ISD::SUB,     MVT::v8i32,    4 },
+    { ISD::ADD,     MVT::v8i32,    4 },
+    { ISD::MUL,     MVT::v4i64,    4 },
+    { ISD::SUB,     MVT::v4i64,    4 },
+    { ISD::ADD,     MVT::v4i64,    4 },
+    };
+
+  // Look for AVX1 lowering tricks.
+  if (ST.hasAVX())
+    for (unsigned int i = 0, e = array_lengthof(AVX1CostTable); i < e; ++i) {
+      if (AVX1CostTable[i].ISD == ISD && AVX1CostTable[i].Type == LT.second)
+        return LT.first * AVX1CostTable[i].Cost;
      }
-  }
  
+  // Fallback to the default implementation.
    return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
  }
  
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll

index 58b4a7c42653aee3c24609ab0bc8b758d4b521e3..37cca8d5406705f44c55a5dab95436d483823ae4 100644 (file)
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -12,6 +12,8 @@ define i32 @add(i32 %arg) {
    %C = add <2 x i64> undef, undef
    ;CHECK: cost of 4 {{.*}} add
    %D = add <4 x i64> undef, undef
+  ;CHECK: cost of 8 {{.*}} add
+  %E = add <8 x i64> undef, undef
    ;CHECK: cost of 1 {{.*}} ret
    ret i32 undef
  }
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll

index 8582613617a2ca2fdd2d7901729bf6342e7690fb..19bcdc5d902741233a9f7f8f9fd8c0e5e5a390fe 100644 (file)
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -25,7 +25,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun
  }
  
  ;CHECK: @conversion_cost2
-;CHECK: store <8 x float>
+;CHECK-NOT: <8 x float>
  ;CHECK: ret
  define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
    %1 = icmp sgt i32 %n, 9
author	Nadav Rotem <nrotem@apple.com>
	Mon, 5 Nov 2012 19:32:46 +0000 (19:32 +0000)
committer	Nadav Rotem <nrotem@apple.com>
	Mon, 5 Nov 2012 19:32:46 +0000 (19:32 +0000)
include/llvm/Target/TargetTransformImpl.h		patch \| blob \| history
lib/Target/TargetTransformImpl.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/arith.ll		patch \| blob \| history
test/Transforms/LoopVectorize/X86/conversion-cost.ll		patch \| blob \| history