SLPVectorize: Put horizontal reductions feeding a store under separate flag
authorArnold Schwaighofer <aschwaighofer@apple.com>
Wed, 25 Sep 2013 14:02:32 +0000 (14:02 +0000)
committerArnold Schwaighofer <aschwaighofer@apple.com>
Wed, 25 Sep 2013 14:02:32 +0000 (14:02 +0000)
Put them under a separate flag for experimentation. They are more likely to
interfere with loop vectorization which happens later in the pass pipeline.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191371 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Transforms/Vectorize/SLPVectorizer.cpp
test/Transforms/SLPVectorizer/X86/horizontal.ll

index b8e57de53be1d6dd177732b7b141e3dbf4b6ed47..acb15603af4bf067b77922395425341d4493e029 100644 (file)
@@ -54,6 +54,11 @@ static cl::opt<bool>
 ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
                    cl::desc("Attempt to vectorize horizontal reductions"));
 
+static cl::opt<bool> ShouldStartVectorizeHorAtStore(
+    "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
+    cl::desc(
+        "Attempt to vectorize horizontal reductions feeding into a store"));
+
 namespace {
 
 static const unsigned MinVecRegSize = 128;
@@ -2336,20 +2341,20 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
     }
 
     // Try to vectorize horizontal reductions feeding into a store.
-    if (StoreInst *SI = dyn_cast<StoreInst>(it))
-      if (BinaryOperator *BinOp =
-              dyn_cast<BinaryOperator>(SI->getValueOperand())) {
-        HorizontalReduction HorRdx;
-        if (ShouldVectorizeHor &&
-            ((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
-              HorRdx.tryToReduce(R, TTI)) ||
-             tryToVectorize(BinOp, R))) {
-          Changed = true;
-          it = BB->begin();
-          e = BB->end();
-          continue;
+    if (ShouldStartVectorizeHorAtStore)
+      if (StoreInst *SI = dyn_cast<StoreInst>(it))
+        if (BinaryOperator *BinOp =
+                dyn_cast<BinaryOperator>(SI->getValueOperand())) {
+          HorizontalReduction HorRdx;
+          if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+                HorRdx.tryToReduce(R, TTI)) ||
+               tryToVectorize(BinOp, R))) {
+            Changed = true;
+            it = BB->begin();
+            e = BB->end();
+            continue;
+          }
         }
-      }
 
     // Try to vectorize trees that start at compare instructions.
     if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
index 9517066ed2e82729f090284b4de850561b80a781..8f919512ff8dacd1b1292d6eac2b559cabbd2ddf 100644 (file)
@@ -1,4 +1,4 @@
-; RUN: opt -slp-vectorizer -slp-vectorize-hor -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -15,9 +15,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;   return sum;
 ; }
 
-; CHECK-LABEL: add_red
-; CHECK: fmul <4 x float>
-; CHECK: shufflevector <4 x float>
+; NOSTORE-LABEL: add_red
+; NOSTORE: fmul <4 x float>
+; NOSTORE: shufflevector <4 x float>
 
 define i32 @add_red(float* %A, i32 %n) {
 entry:
@@ -369,6 +369,8 @@ for.end:
 }
 
 
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE
+
 ; void foo(double * restrict A, double * restrict B, double * restrict C,
 ;          int n) {
 ;   for (intptr_t i=0; i < n; ++i) {
@@ -376,10 +378,10 @@ for.end:
 ;   }
 ; }
 
-; CHECK-LABEL: store_red_double
-; CHECK: fmul <2 x double>
-; CHECK: extractelement <2 x double>
-; CHECK: extractelement <2 x double>
+; STORE-LABEL: store_red_double
+; STORE: fmul <2 x double>
+; STORE: extractelement <2 x double>
+; STORE: extractelement <2 x double>
 
 define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) {
 entry: