Revert r240137 (Fixed/added namespace ending comments using clang-tidy. NFC)

[oota-llvm.git] / include / llvm / CodeGen / BasicTTIImpl.h
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h

index d7129579cf014f7e9a23d2beaba6efb388f3df16..3e464f4f1e5a977927fda3f8993c003f1b6055cb 100644 (file)
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -21,6 +21,7 @@
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
  
  namespace llvm {
  
@@ -113,6 +114,8 @@ public:
  
    bool hasBranchDivergence() { return false; }
  
+  bool isSourceOfDivergence(const Value *V) { return false; }
+
    bool isLegalAddImmediate(int64_t imm) {
      return getTLI()->isLegalAddImmediate(imm);
    }
@@ -122,34 +125,61 @@ public:
    }
  
    bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                             bool HasBaseReg, int64_t Scale) {
+                             bool HasBaseReg, int64_t Scale,
+                             unsigned AddrSpace) {
      TargetLoweringBase::AddrMode AM;
      AM.BaseGV = BaseGV;
      AM.BaseOffs = BaseOffset;
      AM.HasBaseReg = HasBaseReg;
      AM.Scale = Scale;
-    return getTLI()->isLegalAddressingMode(AM, Ty);
+    return getTLI()->isLegalAddressingMode(AM, Ty, AddrSpace);
    }
  
    int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                           bool HasBaseReg, int64_t Scale) {
+                           bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
      TargetLoweringBase::AddrMode AM;
      AM.BaseGV = BaseGV;
      AM.BaseOffs = BaseOffset;
      AM.HasBaseReg = HasBaseReg;
      AM.Scale = Scale;
-    return getTLI()->getScalingFactorCost(AM, Ty);
+    return getTLI()->getScalingFactorCost(AM, Ty, AddrSpace);
    }
  
    bool isTruncateFree(Type *Ty1, Type *Ty2) {
      return getTLI()->isTruncateFree(Ty1, Ty2);
    }
  
+  bool isProfitableToHoist(Instruction *I) {
+    return getTLI()->isProfitableToHoist(I);
+  }
+
    bool isTypeLegal(Type *Ty) {
      EVT VT = getTLI()->getValueType(Ty);
      return getTLI()->isTypeLegal(VT);
    }
  
+  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<const Value *> Arguments) {
+    return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
+  }
+
+  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<Type *> ParamTys) {
+    if (IID == Intrinsic::cttz) {
+      if (getTLI()->isCheapToSpeculateCttz())
+        return TargetTransformInfo::TCC_Basic;
+      return TargetTransformInfo::TCC_Expensive;
+    }
+
+    if (IID == Intrinsic::ctlz) {
+       if (getTLI()->isCheapToSpeculateCtlz())
+        return TargetTransformInfo::TCC_Basic;
+      return TargetTransformInfo::TCC_Expensive;
+    }
+
+    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
+  }
+
    unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
  
    unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
@@ -167,8 +197,32 @@ public:
             TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
    }
  
-  void getUnrollingPreferences(const Function *F, Loop *L,
-                               TTI::UnrollingPreferences &UP) {
+  unsigned getFPOpCost(Type *Ty) {
+    // By default, FP instructions are no more expensive since they are
+    // implemented in HW.  Target specific TTI can override this.
+    return TargetTransformInfo::TCC_Basic;
+  }
+
+  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
+    const TargetLoweringBase *TLI = getTLI();
+    switch (Opcode) {
+    default: break;
+    case Instruction::Trunc: {
+      if (TLI->isTruncateFree(OpTy, Ty))
+        return TargetTransformInfo::TCC_Free;
+      return TargetTransformInfo::TCC_Basic;
+    }
+    case Instruction::ZExt: {
+      if (TLI->isZExtFree(OpTy, Ty))
+        return TargetTransformInfo::TCC_Free;
+      return TargetTransformInfo::TCC_Basic;
+    }
+    }
+
+    return BaseT::getOperationCost(Opcode, Ty, OpTy);
+  }
+
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) {
      // This unrolling functionality is target independent, but to provide some
      // motivation for its intended use, for x86:
  
@@ -232,7 +286,7 @@ public:
  
    unsigned getRegisterBitWidth(bool Vector) { return 32; }
  
-  unsigned getMaxInterleaveFactor() { return 1; }
+  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
  
    unsigned getArithmeticInstrCost(
        unsigned Opcode, Type *Ty,
@@ -469,6 +523,73 @@ public:
      return Cost;
    }
  
+  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                      unsigned Factor,
+                                      ArrayRef<unsigned> Indices,
+                                      unsigned Alignment,
+                                      unsigned AddressSpace) {
+    VectorType *VT = dyn_cast<VectorType>(VecTy);
+    assert(VT && "Expect a vector type for interleaved memory op");
+
+    unsigned NumElts = VT->getNumElements();
+    assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
+
+    unsigned NumSubElts = NumElts / Factor;
+    VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
+
+    // Firstly, the cost of load/store operation.
+    unsigned Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+
+    // Then plus the cost of interleave operation.
+    if (Opcode == Instruction::Load) {
+      // The interleave cost is similar to extract sub vectors' elements
+      // from the wide vector, and insert them into sub vectors.
+      //
+      // E.g. An interleaved load of factor 2 (with one member of index 0):
+      //      %vec = load <8 x i32>, <8 x i32>* %ptr
+      //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
+      // The cost is estimated as extract elements at 0, 2, 4, 6 from the
+      // <8 x i32> vector and insert them into a <4 x i32> vector.
+
+      assert(Indices.size() <= Factor &&
+             "Interleaved memory op has too many members");
+      for (unsigned Index : Indices) {
+        assert(Index < Factor && "Invalid index for interleaved memory op");
+
+        // Extract elements from loaded vector for each sub vector.
+        for (unsigned i = 0; i < NumSubElts; i++)
+          Cost += getVectorInstrCost(Instruction::ExtractElement, VT,
+                                     Index + i * Factor);
+      }
+
+      unsigned InsSubCost = 0;
+      for (unsigned i = 0; i < NumSubElts; i++)
+        InsSubCost += getVectorInstrCost(Instruction::InsertElement, SubVT, i);
+
+      Cost += Indices.size() * InsSubCost;
+    } else {
+      // The interleave cost is extract all elements from sub vectors, and
+      // insert them into the wide vector.
+      //
+      // E.g. An interleaved store of factor 2:
+      //      %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
+      //      store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
+      // The cost is estimated as extract all elements from both <4 x i32>
+      // vectors and insert into the <8 x i32> vector.
+
+      unsigned ExtSubCost = 0;
+      for (unsigned i = 0; i < NumSubElts; i++)
+        ExtSubCost += getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
+
+      Cost += Factor * ExtSubCost;
+
+      for (unsigned i = 0; i < NumElts; i++)
+        Cost += getVectorInstrCost(Instruction::InsertElement, VT, i);
+    }
+
+    return Cost;
+  }
+
    unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                                   ArrayRef<Type *> Tys) {
      unsigned ISD = 0;
@@ -477,18 +598,29 @@ public:
        // Assume that we need to scalarize this intrinsic.
        unsigned ScalarizationCost = 0;
        unsigned ScalarCalls = 1;
+      Type *ScalarRetTy = RetTy;
        if (RetTy->isVectorTy()) {
          ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
          ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+        ScalarRetTy = RetTy->getScalarType();
        }
+      SmallVector<Type *, 4> ScalarTys;
        for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
-        if (Tys[i]->isVectorTy()) {
-          ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
-          ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+        Type *Ty = Tys[i];
+        if (Ty->isVectorTy()) {
+          ScalarizationCost += getScalarizationOverhead(Ty, false, true);
+          ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
+          Ty = Ty->getScalarType();
          }
+        ScalarTys.push_back(Ty);
        }
+      if (ScalarCalls == 1)
+        return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
  
-      return ScalarCalls + ScalarizationCost;
+      unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
+          IID, ScalarRetTy, ScalarTys);
+
+      return ScalarCalls * ScalarCost + ScalarizationCost;
      }
      // Look for intrinsics that can be lowered directly or turned into a scalar
      // intrinsic call.
@@ -598,16 +730,46 @@ public:
      // this will emit a costly libcall, adding call overhead and spills. Make it
      // very expensive.
      if (RetTy->isVectorTy()) {
-      unsigned Num = RetTy->getVectorNumElements();
-      unsigned Cost = static_cast<T *>(this)->getIntrinsicInstrCost(
-          IID, RetTy->getScalarType(), Tys);
-      return 10 * Cost * Num;
+      unsigned ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+      unsigned ScalarCalls = RetTy->getVectorNumElements();
+      SmallVector<Type *, 4> ScalarTys;
+      for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+        Type *Ty = Tys[i];
+        if (Ty->isVectorTy())
+          Ty = Ty->getScalarType();
+        ScalarTys.push_back(Ty);
+      }
+      unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
+          IID, RetTy->getScalarType(), ScalarTys);
+      for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+        if (Tys[i]->isVectorTy()) {
+          ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+          ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
+        }
+      }
+
+      return ScalarCalls * ScalarCost + ScalarizationCost;
      }
  
      // This is going to be turned into a library call, make it expensive.
      return 10;
    }
  
+  /// \brief Compute a cost of the given call instruction.
+  ///
+  /// Compute the cost of calling function F with return type RetTy and
+  /// argument types Tys. F might be nullptr, in this case the cost of an
+  /// arbitrary call with the specified signature will be returned.
+  /// This is used, for instance,  when we estimate call of a vector
+  /// counterpart of the given function.
+  /// \param F Called function, might be nullptr.
+  /// \param RetTy Return value types.
+  /// \param Tys Argument types.
+  /// \returns The cost of Call instruction.
+  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
+    return 10;
+  }
+
    unsigned getNumberOfParts(Type *Tp) {
      std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp);
      return LT.first;