SCEVExpander: Try hard not to create derived induction variables in other loops

author Arnold Schwaighofer <aschwaighofer@apple.com>

Sat, 15 Feb 2014 17:11:56 +0000 (17:11 +0000)

committer Arnold Schwaighofer <aschwaighofer@apple.com>

Sat, 15 Feb 2014 17:11:56 +0000 (17:11 +0000)
author Arnold Schwaighofer <aschwaighofer@apple.com>
Sat, 15 Feb 2014 17:11:56 +0000 (17:11 +0000)
committer Arnold Schwaighofer <aschwaighofer@apple.com>
Sat, 15 Feb 2014 17:11:56 +0000 (17:11 +0000)
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h

index 4433be000d7746b8128b3ebf58fd11f3b98274e6..408e9152b84b9c21c0ea1235e8e194945db7dd70 100644 (file)
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -260,7 +260,9 @@ namespace llvm {
      PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
                                         const Loop *L,
                                         Type *ExpandTy,
-                                       Type *IntTy);
+                                       Type *IntTy,
+                                       Type *&TruncTy,
+                                       bool &InvertStep);
      Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
                         Type *ExpandTy, Type *IntTy, bool useSubtract);
    };
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp

index ea9de2f5a500f0963263975760a061bcda0753cd..b0676a95fe8eea37478cb6872cc6bb6c0ef444b0 100644 (file)
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1017,6 +1017,54 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
    return IncV;
  }
  
+/// \brief Hoist the addrec instruction chain rooted in the loop phi above the
+/// position. This routine assumes that this is possible (has been checked).
+static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
+                           Instruction *Pos, PHINode *LoopPhi) {
+  do {
+    if (DT->dominates(InstToHoist, Pos))
+      break;
+    // Make sure the increment is where we want it. But don't move it
+    // down past a potential existing post-inc user.
+    InstToHoist->moveBefore(Pos);
+    Pos = InstToHoist;
+    InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
+  } while (InstToHoist != LoopPhi);
+}
+
+/// \brief Check whether we can cheaply express the requested SCEV in terms of
+/// the available PHI SCEV by truncation and/or invertion of the step.
+static bool canBeCheaplyTransformed(ScalarEvolution &SE,
+                                    const SCEVAddRecExpr *Phi,
+                                    const SCEVAddRecExpr *Requested,
+                                    bool &InvertStep) {
+  Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
+  Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
+
+  if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
+    return false;
+
+  // Try truncate it if necessary.
+  Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
+  if (!Phi)
+    return false;
+
+  // Check whether truncation will help.
+  if (Phi == Requested) {
+    InvertStep = false;
+    return true;
+  }
+
+  // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
+  if (SE.getAddExpr(Requested->getStart(),
+                    SE.getNegativeSCEV(Requested)) == Phi) {
+    InvertStep = true;
+    return true;
+  }
+
+  return false;
+}
+
  /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
  /// the base addrec, which is the addrec without any non-loop-dominating
  /// values, and return the PHI.
@@ -1024,49 +1072,84 @@ PHINode *
  SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
                                          const Loop *L,
                                          Type *ExpandTy,
-                                        Type *IntTy) {
+                                        Type *IntTy,
+                                        Type *&TruncTy,
+                                        bool &InvertStep) {
    assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
  
    // Reuse a previously-inserted PHI, if present.
    BasicBlock *LatchBlock = L->getLoopLatch();
    if (LatchBlock) {
+    PHINode *AddRecPhiMatch = 0;
+    Instruction *IncV = 0;
+    TruncTy = 0;
+    InvertStep = false;
      for (BasicBlock::iterator I = L->getHeader()->begin();
           PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-      if (!SE.isSCEVable(PN->getType()) ||
-          (SE.getEffectiveSCEVType(PN->getType()) !=
-           SE.getEffectiveSCEVType(Normalized->getType())) ||
-          SE.getSCEV(PN) != Normalized)
+      if (!SE.isSCEVable(PN->getType()))
+        continue;
+
+      const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
+      if (!PhiSCEV)
          continue;
  
-      Instruction *IncV =
-        cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+      bool IsMatchingSCEV = PhiSCEV == Normalized;
+      Instruction *TempIncV =
+          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+      // We only handle truncation and inversion of phi recurrences for the
+      // expanded expression if the expanded expression's loop dominates the
+      // loop we insert to. Check now, so we can bail out early.
+      if (!IsMatchingSCEV)
+        if (!IVIncInsertLoop ||
+            !SE.DT->properlyDominates(L->getHeader(),
+                                      IVIncInsertLoop->getHeader()))
+          continue;
  
+      // Check whether we can reuse this PHI node.
        if (LSRMode) {
-        if (!isExpandedAddRecExprPHI(PN, IncV, L))
+        if (!isExpandedAddRecExprPHI(PN, TempIncV, L))
            continue;
-        if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos))
+        if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
            continue;
-      }
-      else {
-        if (!isNormalAddRecExprPHI(PN, IncV, L))
+      } else {
+        if (!isNormalAddRecExprPHI(PN, TempIncV, L))
            continue;
-        if (L == IVIncInsertLoop)
-          do {
-            if (SE.DT->dominates(IncV, IVIncInsertPos))
-              break;
-            // Make sure the increment is where we want it. But don't move it
-            // down past a potential existing post-inc user.
-            IncV->moveBefore(IVIncInsertPos);
-            IVIncInsertPos = IncV;
-            IncV = cast<Instruction>(IncV->getOperand(0));
-          } while (IncV != PN);
        }
+
+      // Stop if we have found an exact match SCEV.
+      if (IsMatchingSCEV) {
+        IncV = TempIncV;
+        TruncTy = 0;
+        InvertStep = false;
+        AddRecPhiMatch = PN;
+        break;
+      }
+
+      // Try whether the phi can be translated into the requested form
+      // (truncated and/or offset by a constant).
+      if ((!TruncTy || InvertStep) &&
+          canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
+        // Record the phi node. But don't stop we might find an exact match
+        // later.
+        AddRecPhiMatch = PN;
+        IncV = TempIncV;
+        TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
+      }
+    }
+
+    if (AddRecPhiMatch) {
+      // Potentially, move the increment. We have made sure in
+      // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
+      if (L == IVIncInsertLoop)
+        hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
+
        // Ok, the add recurrence looks usable.
        // Remember this PHI, even in post-inc mode.
-      InsertedValues.insert(PN);
+      InsertedValues.insert(AddRecPhiMatch);
        // Remember the increment.
        rememberInstruction(IncV);
-      return PN;
+      return AddRecPhiMatch;
      }
    }
  
@@ -1191,7 +1274,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
    // Expand the core addrec. If we need post-loop scaling, force it to
    // expand to an integer type to avoid the need for additional casting.
    Type *ExpandTy = PostLoopScale ? IntTy : STy;
-  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+  // In some cases, we decide to reuse an existing phi node but need to truncate
+  // it and/or invert the step.
+  Type *TruncTy = 0;
+  bool InvertStep = false;
+  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
+                                          TruncTy, InvertStep);
  
    // Accommodate post-inc mode, if necessary.
    Value *Result;
@@ -1232,6 +1320,20 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
      }
    }
  
+  // We have decided to reuse an induction variable of a dominating loop. Apply
+  // truncation and/or invertion of the step.
+  if (TruncTy) {
+    if (Result->getType() != TruncTy) {
+      Result = Builder.CreateTrunc(Result, TruncTy);
+      rememberInstruction(Result);
+    }
+    if (InvertStep) {
+      Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
+                                 Result);
+      rememberInstruction(Result);
+    }
+  }
+
    // Re-apply any non-loop-dominating scale.
    if (PostLoopScale) {
      assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
diff --git a/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll

new file mode 100644 (file)

index 0000000..5506994
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @indvar_expansion(i8* nocapture readonly %rowsptr) {
+entry:
+  br label %for.cond
+
+; SCEVExpander used to create induction variables in the loop %for.cond while
+; expanding the recurrence start value of loop strength reduced values from
+; %vector.body.
+
+; CHECK-LABEL: indvar_expansion
+; CHECK: for.cond:
+; CHECK-NOT: phi i3
+; CHECK: br i1 {{.+}}, label %for.cond
+
+for.cond:
+  %indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ]
+  %cmp = icmp eq i8 undef, 0
+  %indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1
+  br i1 %cmp, label %for.cond, label %for.cond2
+
+for.cond2:
+  br i1 undef, label %for.cond2, label %for.body14.lr.ph
+
+for.body14.lr.ph:
+  %sext = shl i64 %indvars.iv44, 32
+  %0 = ashr exact i64 %sext, 32
+  %1 = sub i64 undef, %indvars.iv44
+  %2 = and i64 %1, 4294967295
+  %3 = add i64 %2, 1
+  %fold = add i64 %1, 1
+  %n.mod.vf = and i64 %fold, 7
+  %n.vec = sub i64 %3, %n.mod.vf
+  %end.idx.rnd.down = add i64 %n.vec, %0
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ]
+  %4 = getelementptr inbounds i8* %rowsptr, i64 %index
+  %5 = bitcast i8* %4 to <4 x i8>*
+  %wide.load = load <4 x i8>* %5, align 1
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, %end.idx.rnd.down
+  br i1 %6, label %for.end24, label %vector.body
+
+for.end24:
+  ret void
+}
author	Arnold Schwaighofer <aschwaighofer@apple.com>
	Sat, 15 Feb 2014 17:11:56 +0000 (17:11 +0000)
committer	Arnold Schwaighofer <aschwaighofer@apple.com>
	Sat, 15 Feb 2014 17:11:56 +0000 (17:11 +0000)
include/llvm/Analysis/ScalarEvolutionExpander.h		patch \| blob \| history
lib/Analysis/ScalarEvolutionExpander.cpp		patch \| blob \| history
test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll	[new file with mode: 0644]	patch \| blob