LoopVectorizer: Enable unrolling of conditional stores and the load/store

author Arnold Schwaighofer <aschwaighofer@apple.com>

Sun, 2 Feb 2014 03:12:34 +0000 (03:12 +0000)

committer Arnold Schwaighofer <aschwaighofer@apple.com>

Sun, 2 Feb 2014 03:12:34 +0000 (03:12 +0000)
author Arnold Schwaighofer <aschwaighofer@apple.com>
Sun, 2 Feb 2014 03:12:34 +0000 (03:12 +0000)
committer Arnold Schwaighofer <aschwaighofer@apple.com>
Sun, 2 Feb 2014 03:12:34 +0000 (03:12 +0000)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 1f494922b31889d55e987b354129644794205392..930cf7799a2d78cc528599e3718127b7f3ceb009 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -180,16 +180,16 @@ static cl::opt<bool> LoopVectorizeWithBlockFrequency(
  
  // Runtime unroll loops for load/store throughput.
  static cl::opt<bool> EnableLoadStoreRuntimeUnroll(
-    "enable-loadstore-runtime-unroll", cl::init(false), cl::Hidden,
+    "enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden,
      cl::desc("Enable runtime unrolling until load/store ports are saturated"));
  
  /// The number of stores in a loop that are allowed to need predication.
  static cl::opt<unsigned> NumberOfStoresToPredicate(
-    "vectorize-num-stores-pred", cl::init(0), cl::Hidden,
+    "vectorize-num-stores-pred", cl::init(1), cl::Hidden,
      cl::desc("Max number of stores to be predicated behind an if."));
  
  static cl::opt<bool> EnableIndVarRegisterHeur(
-    "enable-ind-var-reg-heur", cl::init(false), cl::Hidden,
+    "enable-ind-var-reg-heur", cl::init(true), cl::Hidden,
      cl::desc("Count the induction variable only once when unrolling"));
  
  static cl::opt<bool> EnableCondStoresVectorization(
diff --git a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll

index 0b87e0e74055d2a86d4e82ae8c2ebb5069462580..8843fc2d2b1ad3b58bb0cb7632c8d069069c5de5 100644 (file)
--- a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
+++ b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -47,6 +47,7 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) {
    %sum.03 = phi i32 [ %7, %.lr.ph ], [ 0, %0 ]
    %sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ]
    %sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+  %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
    %2 = getelementptr inbounds i32* %A, i32 %i.02
    %3 = load i32* %2, align 4
    %4 = add nsw i32 %3, %sum.01
@@ -55,6 +56,7 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) {
    %7 = add nsw i32 %3, %sum.03
    %8 = add nsw i32 %3, %sum.04
    %9 = add nsw i32 %3, %sum.05
+  %10 = add nsw i32 %3, %sum.05
    %exitcond = icmp eq i32 %5, %n
    br i1 %exitcond, label %._crit_edge, label %.lr.ph
  
@@ -64,5 +66,6 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) {
    %sum.2.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ]
    %sum.4.lcssa = phi i32 [ 0, %0 ], [ %8, %.lr.ph ]
    %sum.5.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+  %sum.6.lcssa = phi i32 [ 0, %0 ], [ %10, %.lr.ph ]
    ret i32 %sum.0.lcssa
  }
author	Arnold Schwaighofer <aschwaighofer@apple.com>
	Sun, 2 Feb 2014 03:12:34 +0000 (03:12 +0000)
committer	Arnold Schwaighofer <aschwaighofer@apple.com>
	Sun, 2 Feb 2014 03:12:34 +0000 (03:12 +0000)
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/ARM/arm-unroll.ll		patch \| blob \| history