Fix LSR to tolerate cases where ScalarEvolution initially

author Dan Gohman <gohman@apple.com>

Fri, 23 Apr 2010 01:55:05 +0000 (01:55 +0000)

committer Dan Gohman <gohman@apple.com>

Fri, 23 Apr 2010 01:55:05 +0000 (01:55 +0000)
author Dan Gohman <gohman@apple.com>
Fri, 23 Apr 2010 01:55:05 +0000 (01:55 +0000)
committer Dan Gohman <gohman@apple.com>
Fri, 23 Apr 2010 01:55:05 +0000 (01:55 +0000)
diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt

index c401090272994132732c24dc2da0cc222f58e776..88ea9f11ad785927d935f23b1baa65c3c7a605ff 100644 (file)
--- a/lib/Analysis/README.txt
+++ b/lib/Analysis/README.txt
@@ -16,3 +16,15 @@ In addition to being much more complicated, it involves i65 arithmetic,
  which is very inefficient when expanded into code.
  
  //===---------------------------------------------------------------------===//
+
+In test/CodeGen/X86/lsr-delayed-fold.ll,
+
+ScalarEvolution is forming this expression:
+
+((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32)))
+
+This could be folded to
+
+(-1 * (trunc i64 undef to i32))
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

index a09bca89976cc3142d3540f807012e8f3428059c..a09b3dc5f8518abe6f08c95dd88086f4a6b0c0c3 100644 (file)
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2060,8 +2060,11 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
                             LU.Kind, LU.AccessTy, TLI, SE))
          continue;
  
+      const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+      if (InnerSum->isZero())
+        continue;
        Formula F = Base;
-      F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
+      F.BaseRegs[i] = InnerSum;
        F.BaseRegs.push_back(*J);
        if (InsertFormula(LU, LUIdx, F))
          // If that formula hadn't been seen before, recurse to find more like
diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll

new file mode 100644 (file)

index 0000000..f160c2d
--- /dev/null
+++ b/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=x86-64 < %s > /dev/null
+; rdar://7886751
+
+; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
+; but LSR should tolerate this.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+define fastcc void @formatValue(i64 %arg5) nounwind {
+bb12:                                             ; preds = %bb11
+  %t = trunc i64 %arg5 to i32                   ; <i32> [#uses=1]
+  %t13 = sub i64 0, %arg5                       ; <i64> [#uses=1]
+  %t14 = and i64 %t13, 4294967295             ; <i64> [#uses=1]
+  br label %bb15
+
+bb15:                                             ; preds = %bb21, %bb12
+  %t16 = phi i64 [ 0, %bb12 ], [ %t23, %bb15 ] ; <i64> [#uses=2]
+  %t17 = mul i64 %t14, %t16                 ; <i64> [#uses=1]
+  %t18 = add i64 undef, %t17                  ; <i64> [#uses=1]
+  %t19 = trunc i64 %t18 to i32                ; <i32> [#uses=1]
+  %t22 = icmp eq i32 %t19, %t               ; <i1> [#uses=1]
+  %t23 = add i64 %t16, 1                      ; <i64> [#uses=1]
+  br i1 %t22, label %bb24, label %bb15
+
+bb24:                                             ; preds = %bb21, %bb11
+  unreachable
+}
author	Dan Gohman <gohman@apple.com>
	Fri, 23 Apr 2010 01:55:05 +0000 (01:55 +0000)
committer	Dan Gohman <gohman@apple.com>
	Fri, 23 Apr 2010 01:55:05 +0000 (01:55 +0000)
lib/Analysis/README.txt		patch \| blob \| history
lib/Transforms/Scalar/LoopStrengthReduce.cpp		patch \| blob \| history
test/CodeGen/X86/lsr-delayed-fold.ll	[new file with mode: 0644]	patch \| blob