TargetTransformInfo: address calculation parameter for gather/scather

author Arnold Schwaighofer <aschwaighofer@apple.com>

Fri, 12 Jul 2013 19:16:02 +0000 (19:16 +0000)

committer Arnold Schwaighofer <aschwaighofer@apple.com>

Fri, 12 Jul 2013 19:16:02 +0000 (19:16 +0000)
author Arnold Schwaighofer <aschwaighofer@apple.com>
Fri, 12 Jul 2013 19:16:02 +0000 (19:16 +0000)
committer Arnold Schwaighofer <aschwaighofer@apple.com>
Fri, 12 Jul 2013 19:16:02 +0000 (19:16 +0000)
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h

index eb29e3483d831077cc56e1c4e650ee853df12b70..b8a44b5b665e3c9ff7dc0e60366ea46a802a8639 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -339,7 +339,11 @@ public:
    /// merged into the instruction indexing mode. Some targets might want to
    /// distinguish between address computation for memory operations on vector
    /// types and scalar types. Such targets should override this function.
-  virtual unsigned getAddressComputationCost(Type *Ty) const;
+  /// The 'IsComplex' parameter is a hint that the address computation is likely
+  /// to involve multiple instructions and as such unlikely to be merged into
+  /// the address indexing mode.
+  virtual unsigned getAddressComputationCost(Type *Ty,
+                                             bool IsComplex = false) const;
  
    /// @}
  
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp

index 35ce794c7f12099c310e7c5614ef4baeabd331b9..4b2bf3c770bb34d988a9c788b7b7df298da9a90d 100644 (file)
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -206,8 +206,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
    return PrevTTI->getNumberOfParts(Tp);
  }
  
-unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const {
-  return PrevTTI->getAddressComputationCost(Tp);
+unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
+                                                        bool IsComplex) const {
+  return PrevTTI->getAddressComputationCost(Tp, IsComplex);
  }
  
  namespace {
@@ -559,7 +560,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
      return 0;
    }
  
-  unsigned getAddressComputationCost(Type *Tp) const {
+  unsigned getAddressComputationCost(Type *Tp, bool) const {
      return 0;
    }
  };
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp

index 19ace642ef10504031c163a57e369f781b9515e3..3755320ab15c675c1bf3c5fe2872a2c711e956df 100644 (file)
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -108,7 +108,7 @@ public:
    virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
                                           ArrayRef<Type*> Tys) const;
    virtual unsigned getNumberOfParts(Type *Tp) const;
-  virtual unsigned getAddressComputationCost(Type *Ty) const;
+  virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const;
  
    /// @}
  };
@@ -489,6 +489,6 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
    return LT.first;
  }
  
-unsigned BasicTTI::getAddressComputationCost(Type *Ty) const {
+unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
    return 0;
  }
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp

index 53ece668c3f5294c6d47fcd12861078aa129316b..79f56a4b68cc544036bbb8120d7972f4428b228f 100644 (file)
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -124,7 +124,7 @@ public:
  
    unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
  
-  unsigned getAddressComputationCost(Type *Val) const;
+  unsigned getAddressComputationCost(Type *Val, bool IsComplex) const;
  
    unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                    OperandValueKind Op1Info = OK_AnyValue,
@@ -425,7 +425,7 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
  }
  
-unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
    // In many cases the address computation is not merged into the instruction
    // addressing mode.
    return 1;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 020eb615714ca8eef06deb957b3c4a62ca954121..b35ed74497c7f54535b57e6a23d97bc82e692f4d 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4403,6 +4403,59 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
    return Cost;
  }
  
+/// \brief Check whether the address computation for a non-consecutive memory
+/// access looks like an unlikely candidate for being merged into the indexing
+/// mode.
+///
+/// We look for a GEP which has one index that is an induction variable and all
+/// other indices are loop invariant. If the stride of this access is also
+/// within a small bound we decide that this address computation can likely be
+/// merged into the addressing mode.
+/// In all other cases, we identify the address computation as complex.
+static bool isLikelyComplexAddressComputation(Value *Ptr,
+                                              LoopVectorizationLegality *Legal,
+                                              ScalarEvolution *SE,
+                                              const Loop *TheLoop) {
+  GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!Gep)
+    return true;
+
+  // We are looking for a gep with all loop invariant indices except for one
+  // which should be an induction variable.
+  unsigned NumOperands = Gep->getNumOperands();
+  for (unsigned i = 1; i < NumOperands; ++i) {
+    Value *Opd = Gep->getOperand(i);
+    if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
+        !Legal->isInductionVariable(Opd))
+      return true;
+  }
+
+  // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
+  // can likely be merged into the address computation.
+  unsigned MaxMergeDistance = 64;
+
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
+  if (!AddRec)
+    return true;
+
+  // Check the step is constant.
+  const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  // Calculate the pointer stride and check if it is consecutive.
+  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+  if (!C)
+    return true;
+
+  const APInt &APStepVal = C->getValue()->getValue();
+
+  // Huge step value - give up.
+  if (APStepVal.getBitWidth() > 64)
+    return true;
+
+  int64_t StepVal = APStepVal.getSExtValue();
+
+  return StepVal > MaxMergeDistance;
+}
+
  unsigned
  LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
    // If we know that this instruction will remain uniform, check the cost of
@@ -4498,6 +4551,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
      unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
      unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
      if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
+      bool IsComplexComputation =
+        isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
        unsigned Cost = 0;
        // The cost of extracting from the value vector and pointer vector.
        Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@@ -4513,7 +4568,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
        }
  
        // The cost of the scalar loads/stores.
-      Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
+      Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
        Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
                                         Alignment, AS);
        return Cost;
author	Arnold Schwaighofer <aschwaighofer@apple.com>
	Fri, 12 Jul 2013 19:16:02 +0000 (19:16 +0000)
committer	Arnold Schwaighofer <aschwaighofer@apple.com>
	Fri, 12 Jul 2013 19:16:02 +0000 (19:16 +0000)
include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
lib/CodeGen/BasicTargetTransformInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMTargetTransformInfo.cpp		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history