[x86] Refactor the element insertion logic in the new vector shuffle

[oota-llvm.git] / lib / Analysis / ScalarEvolution.cpp
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index 35a825ad0566ac1a3c195ed4d5ed15d4f0421d3e..d1274d78a30575aad9e8d2acbffc5c1262e6c66b 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1,4 +1,4 @@
-//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -62,6 +62,7 @@
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
  #include "llvm/Analysis/ConstantFolding.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/LoopInfo.h"
@@ -113,6 +114,7 @@ VerifySCEV("verify-scev",
  
  INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
                  "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
  INITIALIZE_PASS_DEPENDENCY(LoopInfo)
  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
@@ -2061,71 +2063,66 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
      // Okay, if there weren't any loop invariants to be folded, check to see if
      // there are multiple AddRec's with the same loop induction variable being
      // multiplied together.  If so, we can fold them.
+
+    // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+    // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+    //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+    //   ]]],+,...up to x=2n}.
+    // Note that the arguments to choose() are always integers with values
+    // known at compile time, never SCEV objects.
+    //
+    // The implementation avoids pointless extra computations when the two
+    // addrec's are of different length (mathematically, it's equivalent to
+    // an infinite stream of zeros on the right).
+    bool OpsModified = false;
      for (unsigned OtherIdx = Idx+1;
-         OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+         OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
           ++OtherIdx) {
-      if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop())
+      const SCEVAddRecExpr *OtherAddRec =
+        dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+      if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
          continue;
  
-      // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
-      // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
-      //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
-      //   ]]],+,...up to x=2n}.
-      // Note that the arguments to choose() are always integers with values
-      // known at compile time, never SCEV objects.
-      //
-      // The implementation avoids pointless extra computations when the two
-      // addrec's are of different length (mathematically, it's equivalent to
-      // an infinite stream of zeros on the right).
-      bool OpsModified = false;
-      for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-           ++OtherIdx) {
-        const SCEVAddRecExpr *OtherAddRec =
-          dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
-        if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
-          continue;
-
-        bool Overflow = false;
-        Type *Ty = AddRec->getType();
-        bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
-        SmallVector<const SCEV*, 7> AddRecOps;
-        for (int x = 0, xe = AddRec->getNumOperands() +
-               OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
-          const SCEV *Term = getConstant(Ty, 0);
-          for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
-            uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
-            for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
-                   ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
-                 z < ze && !Overflow; ++z) {
-              uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
-              uint64_t Coeff;
-              if (LargerThan64Bits)
-                Coeff = umul_ov(Coeff1, Coeff2, Overflow);
-              else
-                Coeff = Coeff1*Coeff2;
-              const SCEV *CoeffTerm = getConstant(Ty, Coeff);
-              const SCEV *Term1 = AddRec->getOperand(y-z);
-              const SCEV *Term2 = OtherAddRec->getOperand(z);
-              Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
-            }
+      bool Overflow = false;
+      Type *Ty = AddRec->getType();
+      bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+      SmallVector<const SCEV*, 7> AddRecOps;
+      for (int x = 0, xe = AddRec->getNumOperands() +
+             OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
+        const SCEV *Term = getConstant(Ty, 0);
+        for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+          uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+          for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+                 ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+               z < ze && !Overflow; ++z) {
+            uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+            uint64_t Coeff;
+            if (LargerThan64Bits)
+              Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+            else
+              Coeff = Coeff1*Coeff2;
+            const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+            const SCEV *Term1 = AddRec->getOperand(y-z);
+            const SCEV *Term2 = OtherAddRec->getOperand(z);
+            Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
            }
-          AddRecOps.push_back(Term);
-        }
-        if (!Overflow) {
-          const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
-                                                SCEV::FlagAnyWrap);
-          if (Ops.size() == 2) return NewAddRec;
-          Ops[Idx] = NewAddRec;
-          Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
-          OpsModified = true;
-          AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
-          if (!AddRec)
-            break;
          }
+        AddRecOps.push_back(Term);
+      }
+      if (!Overflow) {
+        const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+                                              SCEV::FlagAnyWrap);
+        if (Ops.size() == 2) return NewAddRec;
+        Ops[Idx] = NewAddRec;
+        Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+        OpsModified = true;
+        AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
+        if (!AddRec)
+          break;
        }
-      if (OpsModified)
-        return getMulExpr(Ops);
      }
+    if (OpsModified)
+      return getMulExpr(Ops);
  
      // Otherwise couldn't fold anything into this recurrence.  Move onto the
      // next one.
@@ -3263,7 +3260,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
    // PHI's incoming blocks are in a different loop, in which case doing so
    // risks breaking LCSSA form. Instcombine would normally zap these, but
    // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, DL, TLI, DT))
+  if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AT))
      if (LI->replacementPreservesLCSSAForm(PN, V))
        return getSCEV(V);
  
@@ -3395,7 +3392,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
      // For a SCEVUnknown, ask ValueTracking.
      unsigned BitWidth = getTypeSizeInBits(U->getType());
      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    computeKnownBits(U->getValue(), Zeros, Ones);
+    computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT);
      return Zeros.countTrailingOnes();
    }
  
@@ -3534,7 +3531,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
      // For a SCEVUnknown, ask ValueTracking.
      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    computeKnownBits(U->getValue(), Zeros, Ones, DL);
+    computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT);
      if (Ones == ~Zeros + 1)
        return setUnsignedRange(U, ConservativeResult);
      return setUnsignedRange(U,
@@ -3686,7 +3683,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
      // For a SCEVUnknown, ask ValueTracking.
      if (!U->getValue()->getType()->isIntegerTy() && !DL)
        return setSignedRange(U, ConservativeResult);
-    unsigned NS = ComputeNumSignBits(U->getValue(), DL);
+    unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AT, nullptr, DT);
      if (NS <= 1)
        return setSignedRange(U, ConservativeResult);
      return setSignedRange(U, ConservativeResult.intersectWith(
@@ -3793,7 +3790,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        unsigned TZ = A.countTrailingZeros();
        unsigned BitWidth = A.getBitWidth();
        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL);
+      computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL,
+                       0, AT, nullptr, DT);
  
        APInt EffectiveMask =
            APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -6315,13 +6313,22 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
  
    BranchInst *LoopContinuePredicate =
      dyn_cast<BranchInst>(Latch->getTerminator());
-  if (!LoopContinuePredicate ||
-      LoopContinuePredicate->isUnconditional())
-    return false;
+  if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
+      isImpliedCond(Pred, LHS, RHS,
+                    LoopContinuePredicate->getCondition(),
+                    LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
+    return true;
  
-  return isImpliedCond(Pred, LHS, RHS,
-                       LoopContinuePredicate->getCondition(),
-                       LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+  // Check conditions due to any @llvm.assume intrinsics.
+  for (auto &CI : AT->assumptions(F)) {
+    if (!DT->dominates(CI, Latch->getTerminator()))
+      continue;
+
+    if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+      return true;
+  }
+
+  return false;
  }
  
  /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
@@ -6355,6 +6362,15 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
        return true;
    }
  
+  // Check conditions due to any @llvm.assume intrinsics.
+  for (auto &CI : AT->assumptions(F)) {
+    if (!DT->dominates(CI, L->getHeader()))
+      continue;
+
+    if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+      return true;
+  }
+
    return false;
  }
  
@@ -7131,9 +7147,19 @@ public:
  
    void visitAddExpr(const SCEVAddExpr *Numerator) {
      SmallVector<const SCEV *, 2> Qs, Rs;
+    Type *Ty = Denominator->getType();
+
      for (const SCEV *Op : Numerator->operands()) {
        const SCEV *Q, *R;
        divide(SE, Op, Denominator, &Q, &R);
+
+      // Bail out if types do not match.
+      if (Ty != Q->getType() || Ty != R->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
        Qs.push_back(Q);
        Rs.push_back(R);
      }
@@ -7150,9 +7176,17 @@ public:
  
    void visitMulExpr(const SCEVMulExpr *Numerator) {
      SmallVector<const SCEV *, 2> Qs;
+    Type *Ty = Denominator->getType();
  
      bool FoundDenominatorTerm = false;
      for (const SCEV *Op : Numerator->operands()) {
+      // Bail out if types do not match.
+      if (Ty != Op->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
        if (FoundDenominatorTerm) {
          Qs.push_back(Op);
          continue;
@@ -7165,6 +7199,14 @@ public:
          Qs.push_back(Op);
          continue;
        }
+
+      // Bail out if types do not match.
+      if (Ty != Q->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
        FoundDenominatorTerm = true;
        Qs.push_back(Q);
      }
@@ -7190,6 +7232,15 @@ public:
          cast<SCEVConstant>(Zero)->getValue();
      Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
  
+    if (Remainder->isZero()) {
+      // The Quotient is obtained by replacing Denominator by 1 in Numerator.
+      RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+          cast<SCEVConstant>(One)->getValue();
+      Quotient =
+          SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+      return;
+    }
+
      // Quotient is (Numerator - Remainder) divided by Denominator.
      const SCEV *Q, *R;
      const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
@@ -7211,82 +7262,31 @@ private:
  };
  }
  
-// Find the Greatest Common Divisor of A and B.
-static const SCEV *
-findGCD(ScalarEvolution &SE, const SCEV *A, const SCEV *B) {
-
-  if (const SCEVConstant *CA = dyn_cast<SCEVConstant>(A))
-    if (const SCEVConstant *CB = dyn_cast<SCEVConstant>(B))
-      return SE.getConstant(gcd(CA, CB));
-
-  const SCEV *One = SE.getConstant(A->getType(), 1);
-  if (isa<SCEVConstant>(A) && isa<SCEVUnknown>(B))
-    return One;
-  if (isa<SCEVUnknown>(A) && isa<SCEVConstant>(B))
-    return One;
-
-  const SCEV *Q, *R;
-  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(A)) {
-    SmallVector<const SCEV *, 2> Qs;
-    for (const SCEV *Op : M->operands())
-      Qs.push_back(findGCD(SE, Op, B));
-    return SE.getMulExpr(Qs);
-  }
-  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(B)) {
-    SmallVector<const SCEV *, 2> Qs;
-    for (const SCEV *Op : M->operands())
-      Qs.push_back(findGCD(SE, A, Op));
-    return SE.getMulExpr(Qs);
-  }
-
-  SCEVDivision::divide(SE, A, B, &Q, &R);
-  if (R->isZero())
-    return B;
-
-  SCEVDivision::divide(SE, B, A, &Q, &R);
-  if (R->isZero())
-    return A;
-
-  return One;
-}
-
-// Find the Greatest Common Divisor of all the SCEVs in Terms.
-static const SCEV *
-findGCD(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) {
-  assert(Terms.size() > 0 && "Terms vector is empty");
-
-  const SCEV *GCD = Terms[0];
-  for (const SCEV *T : Terms)
-    GCD = findGCD(SE, GCD, T);
-
-  return GCD;
-}
-
  static bool findArrayDimensionsRec(ScalarEvolution &SE,
                                     SmallVectorImpl<const SCEV *> &Terms,
                                     SmallVectorImpl<const SCEV *> &Sizes) {
-  // The GCD of all Terms is the dimension of the innermost dimension.
-  const SCEV *GCD = findGCD(SE, Terms);
+  int Last = Terms.size() - 1;
+  const SCEV *Step = Terms[Last];
  
    // End of recursion.
-  if (Terms.size() == 1) {
-    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(GCD)) {
+  if (Last == 0) {
+    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
        SmallVector<const SCEV *, 2> Qs;
        for (const SCEV *Op : M->operands())
          if (!isa<SCEVConstant>(Op))
            Qs.push_back(Op);
  
-      GCD = SE.getMulExpr(Qs);
+      Step = SE.getMulExpr(Qs);
      }
  
-    Sizes.push_back(GCD);
+    Sizes.push_back(Step);
      return true;
    }
  
    for (const SCEV *&Term : Terms) {
      // Normalize the terms before the next call to findArrayDimensionsRec.
      const SCEV *Q, *R;
-    SCEVDivision::divide(SE, Term, GCD, &Q, &R);
+    SCEVDivision::divide(SE, Term, Step, &Q, &R);
  
      // Bail out when GCD does not evenly divide one of the terms.
      if (!R->isZero())
@@ -7305,7 +7305,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
      if (!findArrayDimensionsRec(SE, Terms, Sizes))
        return false;
  
-  Sizes.push_back(GCD);
+  Sizes.push_back(Step);
    return true;
  }
  
@@ -7381,7 +7381,7 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
    if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
      Ty = Store->getValueOperand()->getType();
    else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
-    Ty = Load->getPointerOperand()->getType();
+    Ty = Load->getType();
    else
      return nullptr;
  
@@ -7395,7 +7395,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
                                            SmallVectorImpl<const SCEV *> &Sizes,
                                            const SCEV *ElementSize) const {
  
-  if (Terms.size() < 1)
+  if (Terms.size() < 1 || !ElementSize)
      return;
  
    // Early return when Terms do not contain parameters: we do not delinearize
@@ -7654,6 +7654,7 @@ ScalarEvolution::ScalarEvolution()
  
  bool ScalarEvolution::runOnFunction(Function &F) {
    this->F = &F;
+  AT = &getAnalysis<AssumptionTracker>();
    LI = &getAnalysis<LoopInfo>();
    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
    DL = DLP ? &DLP->getDataLayout() : nullptr;
@@ -7694,6 +7695,7 @@ void ScalarEvolution::releaseMemory() {
  
  void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.setPreservesAll();
+  AU.addRequired<AssumptionTracker>();
    AU.addRequiredTransitive<LoopInfo>();
    AU.addRequiredTransitive<DominatorTreeWrapperPass>();
    AU.addRequired<TargetLibraryInfo>();