[C++11] Add predecessors(BasicBlock *) / successors(BasicBlock *) iterator ranges.

[oota-llvm.git] / lib / Analysis / ScalarEvolution.cpp
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index dad8e07dadb1e8e046baddcb9310a7eaacf66c97..fd5b86b4634bb0e97ef9f712c0b94abde393fc6c 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1201,6 +1201,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
        return getTruncateOrSignExtend(X, Ty);
    }
  
+  // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
+  if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
+    if (SA->getNumOperands() == 2) {
+      auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
+      auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
+      if (SMul && SC1) {
+        if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
+          const APInt &C1 = SC1->getValue()->getValue();
+          const APInt &C2 = SC2->getValue()->getValue();
+          if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
+              C2.ugt(C1) && C2.isPowerOf2())
+            return getAddExpr(getSignExtendExpr(SC1, Ty),
+                              getSignExtendExpr(SMul, Ty));
+        }
+      }
+    }
+  }
    // If the input value is a chrec scev, and we can prove that the value
    // did not overflow the old, smaller, value, we can sign extend all of the
    // operands (often constants).  This allows analysis of something like
@@ -1292,6 +1309,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
                                 L, AR->getNoWrapFlags());
          }
        }
+      // If Start and Step are constants, check if we can apply this
+      // transformation:
+      // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
+      auto SC1 = dyn_cast<SCEVConstant>(Start);
+      auto SC2 = dyn_cast<SCEVConstant>(Step);
+      if (SC1 && SC2) {
+        const APInt &C1 = SC1->getValue()->getValue();
+        const APInt &C2 = SC2->getValue()->getValue();
+        if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
+            C2.isPowerOf2()) {
+          Start = getSignExtendExpr(Start, Ty);
+          const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
+                                            L, AR->getNoWrapFlags());
+          return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
+        }
+      }
      }
  
    // The cast wasn't folded; create an explicit cast node.
@@ -4409,36 +4442,63 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
    SmallVector<BasicBlock *, 8> ExitingBlocks;
    L->getExitingBlocks(ExitingBlocks);
  
-  // Examine all exits and pick the most conservative values.
-  const SCEV *MaxBECount = getCouldNotCompute();
+  SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
    bool CouldComputeBECount = true;
    BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
-  bool LatchMustExit = false;
-  SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+  const SCEV *MustExitMaxBECount = nullptr;
+  const SCEV *MayExitMaxBECount = nullptr;
+
+  // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
+  // and compute maxBECount.
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-    ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
+    BasicBlock *ExitBB = ExitingBlocks[i];
+    ExitLimit EL = ComputeExitLimit(L, ExitBB);
+
+    // 1. For each exit that can be computed, add an entry to ExitCounts.
+    // CouldComputeBECount is true only if all exits can be computed.
      if (EL.Exact == getCouldNotCompute())
        // We couldn't compute an exact value for this exit, so
        // we won't be able to compute an exact value for the loop.
        CouldComputeBECount = false;
      else
-      ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
-
-    if (MaxBECount == getCouldNotCompute())
-      MaxBECount = EL.Max;
-    else if (EL.Max != getCouldNotCompute()) {
-      // We cannot take the "min" MaxBECount, because non-unit stride loops may
-      // skip some loop tests. Taking the max over the exits is sufficiently
-      // conservative.  TODO: We could do better taking into consideration
-      // non-latch exits that dominate the latch.
-      if (EL.MustExit && ExitingBlocks[i] == Latch) {
-        MaxBECount = EL.Max;
-        LatchMustExit = true;
+      ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
+
+    // 2. Derive the loop's MaxBECount from each exit's max number of
+    // non-exiting iterations. Partition the loop exits into two kinds:
+    // LoopMustExits and LoopMayExits.
+    //
+    // A LoopMustExit meets two requirements:
+    //
+    // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit
+    // test condition cannot be skipped (the tested variable has unit stride or
+    // the test is less-than or greater-than, rather than a strict inequality).
+    //
+    // (b) It must dominate the loop latch, hence must be tested on every loop
+    // iteration.
+    //
+    // If any computable LoopMustExit is found, then MaxBECount is the minimum
+    // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is
+    // conservatively the maximum EL.Max, where CouldNotCompute is considered
+    // greater than any computable EL.Max.
+    if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch &&
+        DT->dominates(ExitBB, Latch)) {
+      if (!MustExitMaxBECount)
+        MustExitMaxBECount = EL.Max;
+      else {
+        MustExitMaxBECount =
+          getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
+      }
+    } else if (MayExitMaxBECount != getCouldNotCompute()) {
+      if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
+        MayExitMaxBECount = EL.Max;
+      else {
+        MayExitMaxBECount =
+          getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
        }
-      else if (!LatchMustExit)
-        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
      }
    }
+  const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
+    (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
    return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
  }
  
@@ -4452,13 +4512,12 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
    // lead to the loop header.
    bool MustExecuteLoopHeader = true;
    BasicBlock *Exit = nullptr;
-  for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
-       SI != SE; ++SI)
-    if (!L->contains(*SI)) {
+  for (BasicBlock *Succ : successors(ExitingBlock))
+    if (!L->contains(Succ)) {
        if (Exit) // Multiple exit successors.
          return getCouldNotCompute();
-      Exit = *SI;
-    } else if (*SI != L->getHeader()) {
+      Exit = Succ;
+    } else if (Succ != L->getHeader()) {
        MustExecuteLoopHeader = false;
      }
  
@@ -6135,18 +6194,30 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
  
    // If LHS or RHS is an addrec, check to see if the condition is true in
    // every iteration of the loop.
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
-    if (isLoopEntryGuardedByCond(
-          AR->getLoop(), Pred, AR->getStart(), RHS) &&
-        isLoopBackedgeGuardedByCond(
-          AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
-      return true;
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
-    if (isLoopEntryGuardedByCond(
-          AR->getLoop(), Pred, LHS, AR->getStart()) &&
-        isLoopBackedgeGuardedByCond(
-          AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
-      return true;
+  // If LHS and RHS are both addrec, both conditions must be true in
+  // every iteration of the loop.
+  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+  bool LeftGuarded = false;
+  bool RightGuarded = false;
+  if (LAR) {
+    const Loop *L = LAR->getLoop();
+    if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
+        isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
+      if (!RAR) return true;
+      LeftGuarded = true;
+    }
+  }
+  if (RAR) {
+    const Loop *L = RAR->getLoop();
+    if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
+        isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
+      if (!LAR) return true;
+      RightGuarded = true;
+    }
+  }
+  if (LeftGuarded && RightGuarded)
+    return true;
  
    // Otherwise see what can be done with known constant ranges.
    return isKnownPredicateWithRanges(Pred, LHS, RHS);
@@ -6872,7 +6943,7 @@ struct SCEVCollectTerms {
        : Terms(T) {}
  
    bool follow(const SCEV *S) {
-    if (isa<SCEVUnknown>(S) || isa<SCEVConstant>(S) || isa<SCEVMulExpr>(S)) {
+    if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
        if (!containsUndefs(S))
          Terms.push_back(S);
  
@@ -7059,9 +7130,19 @@ public:
  
    void visitAddExpr(const SCEVAddExpr *Numerator) {
      SmallVector<const SCEV *, 2> Qs, Rs;
+    Type *Ty = Denominator->getType();
+
      for (const SCEV *Op : Numerator->operands()) {
        const SCEV *Q, *R;
        divide(SE, Op, Denominator, &Q, &R);
+
+      // Bail out if types do not match.
+      if (Ty != Q->getType() || Ty != R->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
        Qs.push_back(Q);
        Rs.push_back(R);
      }
@@ -7078,9 +7159,17 @@ public:
  
    void visitMulExpr(const SCEVMulExpr *Numerator) {
      SmallVector<const SCEV *, 2> Qs;
+    Type *Ty = Denominator->getType();
  
      bool FoundDenominatorTerm = false;
      for (const SCEV *Op : Numerator->operands()) {
+      // Bail out if types do not match.
+      if (Ty != Op->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
        if (FoundDenominatorTerm) {
          Qs.push_back(Op);
          continue;
@@ -7093,6 +7182,14 @@ public:
          Qs.push_back(Op);
          continue;
        }
+
+      // Bail out if types do not match.
+      if (Ty != Q->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
        FoundDenominatorTerm = true;
        Qs.push_back(Q);
      }
@@ -7118,6 +7215,15 @@ public:
          cast<SCEVConstant>(Zero)->getValue();
      Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
  
+    if (Remainder->isZero()) {
+      // The Quotient is obtained by replacing Denominator by 1 in Numerator.
+      RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+          cast<SCEVConstant>(One)->getValue();
+      Quotient =
+          SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+      return;
+    }
+
      // Quotient is (Numerator - Remainder) divided by Denominator.
      const SCEV *Q, *R;
      const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
@@ -7139,82 +7245,31 @@ private:
  };
  }
  
-// Find the Greatest Common Divisor of A and B.
-static const SCEV *
-findGCD(ScalarEvolution &SE, const SCEV *A, const SCEV *B) {
-
-  if (const SCEVConstant *CA = dyn_cast<SCEVConstant>(A))
-    if (const SCEVConstant *CB = dyn_cast<SCEVConstant>(B))
-      return SE.getConstant(gcd(CA, CB));
-
-  const SCEV *One = SE.getConstant(A->getType(), 1);
-  if (isa<SCEVConstant>(A) && isa<SCEVUnknown>(B))
-    return One;
-  if (isa<SCEVUnknown>(A) && isa<SCEVConstant>(B))
-    return One;
-
-  const SCEV *Q, *R;
-  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(A)) {
-    SmallVector<const SCEV *, 2> Qs;
-    for (const SCEV *Op : M->operands())
-      Qs.push_back(findGCD(SE, Op, B));
-    return SE.getMulExpr(Qs);
-  }
-  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(B)) {
-    SmallVector<const SCEV *, 2> Qs;
-    for (const SCEV *Op : M->operands())
-      Qs.push_back(findGCD(SE, A, Op));
-    return SE.getMulExpr(Qs);
-  }
-
-  SCEVDivision::divide(SE, A, B, &Q, &R);
-  if (R->isZero())
-    return B;
-
-  SCEVDivision::divide(SE, B, A, &Q, &R);
-  if (R->isZero())
-    return A;
-
-  return One;
-}
-
-// Find the Greatest Common Divisor of all the SCEVs in Terms.
-static const SCEV *
-findGCD(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) {
-  assert(Terms.size() > 0 && "Terms vector is empty");
-
-  const SCEV *GCD = Terms[0];
-  for (const SCEV *T : Terms)
-    GCD = findGCD(SE, GCD, T);
-
-  return GCD;
-}
-
  static bool findArrayDimensionsRec(ScalarEvolution &SE,
                                     SmallVectorImpl<const SCEV *> &Terms,
                                     SmallVectorImpl<const SCEV *> &Sizes) {
-  // The GCD of all Terms is the dimension of the innermost dimension.
-  const SCEV *GCD = findGCD(SE, Terms);
+  int Last = Terms.size() - 1;
+  const SCEV *Step = Terms[Last];
  
    // End of recursion.
-  if (Terms.size() == 1) {
-    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(GCD)) {
+  if (Last == 0) {
+    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
        SmallVector<const SCEV *, 2> Qs;
        for (const SCEV *Op : M->operands())
          if (!isa<SCEVConstant>(Op))
            Qs.push_back(Op);
  
-      GCD = SE.getMulExpr(Qs);
+      Step = SE.getMulExpr(Qs);
      }
  
-    Sizes.push_back(GCD);
+    Sizes.push_back(Step);
      return true;
    }
  
    for (const SCEV *&Term : Terms) {
      // Normalize the terms before the next call to findArrayDimensionsRec.
      const SCEV *Q, *R;
-    SCEVDivision::divide(SE, Term, GCD, &Q, &R);
+    SCEVDivision::divide(SE, Term, Step, &Q, &R);
  
      // Bail out when GCD does not evenly divide one of the terms.
      if (!R->isZero())
@@ -7233,7 +7288,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
      if (!findArrayDimensionsRec(SE, Terms, Sizes))
        return false;
  
-  Sizes.push_back(GCD);
+  Sizes.push_back(Step);
    return true;
  }
  
@@ -7284,13 +7339,46 @@ static inline int numberOfTerms(const SCEV *S) {
    return 1;
  }
  
+static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
+  if (isa<SCEVConstant>(T))
+    return nullptr;
+
+  if (isa<SCEVUnknown>(T))
+    return T;
+
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
+    SmallVector<const SCEV *, 2> Factors;
+    for (const SCEV *Op : M->operands())
+      if (!isa<SCEVConstant>(Op))
+        Factors.push_back(Op);
+
+    return SE.getMulExpr(Factors);
+  }
+
+  return T;
+}
+
+/// Return the size of an element read or written by Inst.
+const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
+  Type *Ty;
+  if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+    Ty = Store->getValueOperand()->getType();
+  else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+    Ty = Load->getType();
+  else
+    return nullptr;
+
+  Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
+  return getSizeOfExpr(ETy, Ty);
+}
+
  /// Second step of delinearization: compute the array dimensions Sizes from the
  /// set of Terms extracted from the memory access function of this SCEVAddRec.
-void ScalarEvolution::findArrayDimensions(
-    SmallVectorImpl<const SCEV *> &Terms,
-    SmallVectorImpl<const SCEV *> &Sizes) const {
+void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
+                                          SmallVectorImpl<const SCEV *> &Sizes,
+                                          const SCEV *ElementSize) const {
  
-  if (Terms.size() < 2)
+  if (Terms.size() < 1 || !ElementSize)
      return;
  
    // Early return when Terms do not contain parameters: we do not delinearize
@@ -7313,20 +7401,37 @@ void ScalarEvolution::findArrayDimensions(
      return numberOfTerms(LHS) > numberOfTerms(RHS);
    });
  
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+  // Divide all terms by the element size.
+  for (const SCEV *&Term : Terms) {
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+    Term = Q;
+  }
+
+  SmallVector<const SCEV *, 4> NewTerms;
+
+  // Remove constant factors.
+  for (const SCEV *T : Terms)
+    if (const SCEV *NewT = removeConstantFactors(SE, T))
+      NewTerms.push_back(NewT);
+
    DEBUG({
        dbgs() << "Terms after sorting:\n";
-      for (const SCEV *T : Terms)
+      for (const SCEV *T : NewTerms)
          dbgs() << *T << "\n";
      });
  
-  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
-  bool Res = findArrayDimensionsRec(SE, Terms, Sizes);
-
-  if (!Res) {
+  if (NewTerms.empty() ||
+      !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
      Sizes.clear();
      return;
    }
  
+  // The last element to be pushed into Sizes is the size of an element.
+  Sizes.push_back(ElementSize);
+
    DEBUG({
        dbgs() << "Sizes:\n";
        for (const SCEV *S : Sizes)
@@ -7336,16 +7441,15 @@ void ScalarEvolution::findArrayDimensions(
  
  /// Third step of delinearization: compute the access functions for the
  /// Subscripts based on the dimensions in Sizes.
-const SCEV *SCEVAddRecExpr::computeAccessFunctions(
+void SCEVAddRecExpr::computeAccessFunctions(
      ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Subscripts,
      SmallVectorImpl<const SCEV *> &Sizes) const {
  
    // Early exit in case this SCEV is not an affine multivariate function.
    if (Sizes.empty() || !this->isAffine())
-    return nullptr;
+    return;
  
-  const SCEV *Zero = SE.getConstant(this->getType(), 0);
-  const SCEV *Res = this, *Remainder = Zero;
+  const SCEV *Res = this;
    int Last = Sizes.size() - 1;
    for (int i = Last; i >= 0; i--) {
      const SCEV *Q, *R;
@@ -7361,10 +7465,17 @@ const SCEV *SCEVAddRecExpr::computeAccessFunctions(
  
      Res = Q;
  
+    // Do not record the last subscript corresponding to the size of elements in
+    // the array.
      if (i == Last) {
-      // Do not record the last subscript corresponding to the size of elements
-      // in the array.
-      Remainder = R;
+
+      // Bail out if the remainder is too complex.
+      if (isa<SCEVAddRecExpr>(R)) {
+        Subscripts.clear();
+        Sizes.clear();
+        return;
+      }
+
        continue;
      }
  
@@ -7383,7 +7494,6 @@ const SCEV *SCEVAddRecExpr::computeAccessFunctions(
        for (const SCEV *S : Subscripts)
          dbgs() << *S << "\n";
      });
-  return Remainder;
  }
  
  /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
@@ -7435,28 +7545,28 @@ const SCEV *SCEVAddRecExpr::computeAccessFunctions(
  /// asking for the SCEV of the memory access with respect to all enclosing
  /// loops, calling SCEV->delinearize on that and printing the results.
  
-const SCEV *
-SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
-                            SmallVectorImpl<const SCEV *> &Subscripts,
-                            SmallVectorImpl<const SCEV *> &Sizes) const {
+void SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+                                 SmallVectorImpl<const SCEV *> &Subscripts,
+                                 SmallVectorImpl<const SCEV *> &Sizes,
+                                 const SCEV *ElementSize) const {
    // First step: collect parametric terms.
    SmallVector<const SCEV *, 4> Terms;
    collectParametricTerms(SE, Terms);
  
    if (Terms.empty())
-    return nullptr;
+    return;
  
    // Second step: find subscript sizes.
-  SE.findArrayDimensions(Terms, Sizes);
+  SE.findArrayDimensions(Terms, Sizes, ElementSize);
  
    if (Sizes.empty())
-    return nullptr;
+    return;
  
    // Third step: compute the access functions for each subscript.
-  const SCEV *Remainder = computeAccessFunctions(SE, Subscripts, Sizes);
+  computeAccessFunctions(SE, Subscripts, Sizes);
  
-  if (!Remainder || Subscripts.empty())
-    return nullptr;
+  if (Subscripts.empty())
+    return;
  
    DEBUG({
        dbgs() << "succeeded to delinearize " << *this << "\n";
@@ -7469,8 +7579,6 @@ SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
          dbgs() << "[" << *S << "]";
        dbgs() << "\n";
      });
-
-  return Remainder;
  }
  
  //===----------------------------------------------------------------------===//