SLPVectorizer: Add support for trees that don't start at binary operators, and add...

author Nadav Rotem <nrotem@apple.com>

Sun, 14 Apr 2013 05:15:53 +0000 (05:15 +0000)

committer Nadav Rotem <nrotem@apple.com>

Sun, 14 Apr 2013 05:15:53 +0000 (05:15 +0000)
author Nadav Rotem <nrotem@apple.com>
Sun, 14 Apr 2013 05:15:53 +0000 (05:15 +0000)
committer Nadav Rotem <nrotem@apple.com>
Sun, 14 Apr 2013 05:15:53 +0000 (05:15 +0000)
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 2f55a007f24eb006f5d378f2e5cb68e4dcc61fcf..d94b2b2a0e3095d9e410ed447c83581583c1091b 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -85,14 +85,16 @@ struct SLPVectorizer : public BasicBlockPass {
      return true;
    }
  
-  bool tryToVectorizePair(BinaryOperator *A, BinaryOperator *B,  BoUpSLP &R) {
+  bool tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R) {
      if (!A || !B) return false;
      BoUpSLP::ValueList VL;
      VL.push_back(A);
      VL.push_back(B);
      int Cost = R.getTreeCost(VL);
-    DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << ".\n");
-    if (Cost >= -SLPCostThreshold) return false;
+    int ExtrCost = R.getScalarizationCost(VL);
+    DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+                  " Cost of extract:" << ExtrCost << ".\n");
+    if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
      DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
      R.vectorizeArith(VL);
      return true;
@@ -100,11 +102,12 @@ struct SLPVectorizer : public BasicBlockPass {
  
    bool tryToVectorizeCandidate(BinaryOperator *V,  BoUpSLP &R) {
      if (!V) return false;
-    BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
-    BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
      // Try to vectorize V.
-    if (tryToVectorizePair(A, B, R)) return true;
+    if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+      return true;
  
+    BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+    BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
      // Try to skip B.
      if (B && B->hasOneUse()) {
        BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp

index 4d075c505d29e0a49e8a9a57bd739bd5cfcf656e..584f3d97788cd4457ce2317cc7be5fe2f4320ea2 100644 (file)
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -173,6 +173,16 @@ bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) {
    return Changed;
  }
  
+int BoUpSLP::getScalarizationCost(ValueList &VL) {
+  Type *ScalarTy = VL[0]->getType();
+
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+
+  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+  return getScalarizationCost(VecTy);
+}
+
  int BoUpSLP::getScalarizationCost(Type *Ty) {
    int Cost = 0;
    for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h

index f865236ff88b441aa4f6a1e9df170e0381a23e45..edebcb3e275e5adce356dac734b214d11feb1f40 100644 (file)
--- a/lib/Transforms/Vectorize/VecUtils.h
+++ b/lib/Transforms/Vectorize/VecUtils.h
@@ -61,6 +61,11 @@ struct BoUpSLP  {
    /// A negative number means that this is profitable.
    int getTreeCost(ValueList &VL);
  
+  /// \returns the scalarization cost for this ValueList. Assuming that this
+  /// subtree gets vectorized, we may need to extract the values from the
+  /// roots. This method calculates the cost of extracting the values.
+  int getScalarizationCost(ValueList &VL);
+
    /// \brief Attempts to order and vectorize a sequence of stores. This
    /// function does a quadratic scan of the given stores.
    /// \returns true if the basic block was modified.
@@ -118,7 +123,7 @@ private:
    /// by multiple lanes, or by users outside the tree.
    /// NOTICE: The vectorization methods also use this set.
    ValueSet MustScalarize;
-  
+
    // Contains a list of values that are used outside the current tree. This
    // set must be reset between runs.
    ValueSet MultiUserVals;
diff --git a/test/Transforms/SLPVectorizer/X86/reduction2.ll b/test/Transforms/SLPVectorizer/X86/reduction2.ll

new file mode 100644 (file)

index 0000000..9b5d5f7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: load <2 x double>
+;CHECK: ret
+define double @foo(double* nocapture %D) #0 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %i.02 = phi i32 [ 0, %0 ], [ %10, %1 ]
+  %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
+  %2 = shl nsw i32 %i.02, 1
+  %3 = getelementptr inbounds double* %D, i32 %2
+  %4 = load double* %3, align 4, !tbaa !0
+  %A4 = fmul double %4, %4
+  %5 = or i32 %2, 1
+  %6 = getelementptr inbounds double* %D, i32 %5
+  %7 = load double* %6, align 4, !tbaa !0
+  %A7 = fmul double %7, %7
+  %8 = fadd double %A4, %A7
+  %9 = fadd double %sum.01, %8
+  %10 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %10, 100
+  br i1 %exitcond, label %11, label %1
+
+; <label>:11                                      ; preds = %1
+  ret double %9
+}
+
+attributes #0 = { nounwind readonly ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
author	Nadav Rotem <nrotem@apple.com>
	Sun, 14 Apr 2013 05:15:53 +0000 (05:15 +0000)
committer	Nadav Rotem <nrotem@apple.com>
	Sun, 14 Apr 2013 05:15:53 +0000 (05:15 +0000)
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
lib/Transforms/Vectorize/VecUtils.cpp		patch \| blob \| history
lib/Transforms/Vectorize/VecUtils.h		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/reduction2.ll	[new file with mode: 0644]	patch \| blob