From: Michael Zolotukhin <mzolotukhin@apple.com>
Date: Wed, 12 Mar 2014 21:31:05 +0000 (+0000)
Subject: PR17473:
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=4a0593ccd38cf01419e957e3947ed24cb3e06a07;p=oota-llvm.git

PR17473:
Don't normalize an expression during postinc transformation unless it's
invertible.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203719 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 212e98c7f8d..5317a479598 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #define DEBUG_TYPE "iv-users"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/ADT/STLExtras.h"
@@ -186,15 +185,34 @@ bool IVUsers::AddUsersImpl(Instruction *I,
 
     if (AddUserToIVUsers) {
       // Okay, we found a user that we cannot reduce.
-      IVUses.push_back(new IVStrideUse(this, User, I));
-      IVStrideUse &NewUse = IVUses.back();
+      IVStrideUse &NewUse = AddUser(User, I);
       // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
       // The regular return value here is discarded; instead of recording
       // it, we just recompute it when we need it.
+      const SCEV *OriginalISE = ISE;
       ISE = TransformForPostIncUse(NormalizeAutodetect,
                                    ISE, User, I,
                                    NewUse.PostIncLoops,
                                    *SE, *DT);
+
+      // PostIncNormalization effectively simplifies the expression under
+      // pre-increment assumptions. Those assumptions (no wrapping) might not
+      // hold for the post-inc value. Catch such cases by making sure the
+      // transformation is invertible.
+      if (OriginalISE != ISE) {
+        const SCEV *DenormalizedISE =
+          TransformForPostIncUse(Denormalize, ISE, User, I,
+              NewUse.PostIncLoops, *SE, *DT);
+
+        // If we normalized the expression, but denormalization doesn't give the
+        // original one, discard this user.
+        if (OriginalISE != DenormalizedISE) {
+          DEBUG(dbgs() << "   DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
+                       << *ISE << '\n');
+          IVUses.pop_back();
+          return false;
+        }
+      }
       DEBUG(if (SE->getSCEV(I) != ISE)
               dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
     }
diff --git a/test/Transforms/LoopStrengthReduce/pr17473.ll b/test/Transforms/LoopStrengthReduce/pr17473.ll
new file mode 100644
index 00000000000..e7ebaa8ea61
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr17473.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't normalize IV if it can't be denormalized to the original
+; expression.  In this testcase, the normalized expression was denormalized to
+; an expression different from the original, and we were losing sign extension.
+
+; CHECK:    [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8
+; CHECK:     {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32
+
+@j = common global i32 0, align 4
+@c = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i8 0, align 1
+@d = common global i32 0, align 4
+@i = common global i32 0, align 4
+@e = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1
+@a = common global i32 0, align 4
+@b = common global i16 0, align 2
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  store i8 0, i8* @h, align 1
+  %0 = load i32* @j, align 4
+  %tobool.i = icmp eq i32 %0, 0
+  %1 = load i32* @d, align 4
+  %cmp3 = icmp sgt i32 %1, -1
+  %.lobit = lshr i32 %1, 31
+  %.lobit.not = xor i32 %.lobit, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %fn3.exit
+  %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ]
+  %conv = sext i8 %inc9 to i32
+  br i1 %tobool.i, label %fn3.exit, label %land.rhs.i
+
+land.rhs.i:                                       ; preds = %for.body
+  store i32 0, i32* @c, align 4
+  br label %fn3.exit
+
+fn3.exit:                                         ; preds = %for.body, %land.rhs.i
+  %inc = add i8 %inc9, 1
+  %cmp = icmp sgt i8 %inc, -1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %fn3.exit
+  %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0
+  store i32 %conv, i32* @g, align 4
+  store i32 %.lobit.not., i32* @i, align 4
+  store i8 %inc, i8* @h, align 1
+  %conv7 = sext i8 %inc to i32
+  %add = add nsw i32 %conv7, %conv
+  store i32 %add, i32* @e, align 4
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }